static Processor prologProcessor;
static Processor contentProcessor;
static Processor epilogProcessor;
+static Processor errorProcessor;
static enum XML_Error
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (!parser)
return parser;
processor = prologProcessor;
- XmlInitEncoding(&initEncoding, &encoding);
XmlPrologStateInit(&prologState);
userData = 0;
startElementHandler = 0;
return 0;
}
dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
+ if (!XmlInitEncoding(&initEncoding, &encoding, encodingName)) {
+ errorCode = XML_ERROR_UNKNOWN_ENCODING;
+ processor = errorProcessor;
+ }
return parser;
}
}
}
+static
+enum XML_Error errorProcessor(XML_Parser parser,
+ const char *s,
+ const char *end,
+ const char **nextPtr)
+{
+ return errorCode;
+}
+
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
const char *ptr, const char *end,
#undef PREFIX
+static
+int streqci(const char *s1, const char *s2)
+{
+ for (;;) {
+ char c1 = *s1++;
+ char c2 = *s2++;
+ if ('a' <= c1 && c1 <= 'z')
+ c1 += 'A' - 'a';
+ if ('a' <= c2 && c2 <= 'z')
+ c2 += 'A' - 'a';
+ if (c1 != c2)
+ return 0;
+ if (!c1)
+ break;
+ }
+ return 1;
+}
+
static
int initScan(const ENCODING *enc, int state, const char *ptr, const char *end,
const char **nextTokPtr)
return 0;
}
-void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr)
+int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
{
+ if (name) {
+ if (streqci(name, "ISO-8859-1")) {
+ *encPtr = &latin1_encoding.enc;
+ return 1;
+ }
+ if (streqci(name, "UTF-8")) {
+ *encPtr = &utf8_encoding.enc;
+ return 1;
+ }
+ if (!streqci(name, "UTF-16"))
+ return 0;
+ }
p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog;
p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent;
p->initEnc.updatePosition = initUpdatePosition;
p->initEnc.minBytesPerChar = 1;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
+ return 1;
}
static
return 1;
}
-static
-int streq(const char *s1, const char *s2)
-{
- for (; *s1 == *s2; s1++, s2++)
- if (!*s1)
- return 1;
- return 0;
-}
-
static
const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end)
{
if ('a' <= buf[i] && buf[i] <= 'z')
buf[i] += 'A' - 'a';
}
- if (streq(buf, "UTF-8"))
+ if (streqci(buf, "UTF-8"))
return &utf8_encoding.enc;
- if (streq(buf, "ISO-8859-1"))
+ if (streqci(buf, "ISO-8859-1"))
return &latin1_encoding.enc;
- if (streq(buf, "UTF-16")) {
+ if (streqci(buf, "UTF-16")) {
static const unsigned short n = 1;
if (enc->minBytesPerChar == 2)
return enc;
- if (*(const char *)&n)
- return &little2_encoding;
- else
- return &big2_encoding;
+ return &big2_encoding;
}
return 0;
}
{
int i;
const char *outputDir = 0;
+ const char *encoding = 0;
int useFilemap = 1;
#ifdef _MSC_VER
outputDir = argv[i] + j + 1;
i++;
}
+ if (argv[i][j] == 'e') {
+ if (argv[i][j + 1] == '\0') {
+ if (++i == argc)
+ usage(argv[0]);
+ encoding = argv[i];
+ }
+ else
+ encoding = argv[i] + j + 1;
+ i++;
+ }
else if (argv[i][j] == '\0' && j > 1)
i++;
else
FILE *fp = 0;
char *outName = 0;
int result;
- XML_Parser parser = XML_ParserCreate(0);
+ XML_Parser parser = XML_ParserCreate(encoding);
if (outputDir) {
outName = malloc(strlen(outputDir) + strlen(argv[i]) + 2);
strcpy(outName, outputDir);