--- /dev/null
+class XMLParser {
+public:
+ typedef char Char;
+ typedef LChar Char;
+
+ class ElementHandler {
+ public:
+ virtual void startElement(XMLParser &parser,
+ const Char *name,
+ const Char **atts) = 0;
+ virtual void endElement(XMLParser &parser, const Char *name) = 0;
+ }
+
+ class CharacterDataHandler {
+ public:
+ virtual void characterData(XMLParser &parser, const Char *s, int len) = 0;
+ };
+
+ class ProcessingInstructionHandler {
+ public:
+ virtual void processingInstruction(XMLParser &parser,
+ const Char *target,
+ const Char *data) = 0;
+ };
+
+ class OtherHandler {
+ public:
+ virtual void other(XMLParser &parser, const Char *s, int len) = 0;
+ };
+
+ class DeclHandler {
+ public:
+ virtual void unparsedEntityDecl(XMLParser &parser,
+ const Char *entityName,
+ const Char *base,
+ const Char *systemId,
+ const Char *publicId,
+ const Char *notationName) = 0;
+ virtual void notationDecl(XMLParser &parser,
+ const Char *notationName,
+ const Char *base,
+ const Char *systemId,
+ const Char *publicId) = 0;
+ };
+
+ class ExternalEntityRefHandler {
+ public:
+ virtual int externalEntityRef(XMLParser &parser,
+ const Char *openEntityNames,
+ const Char *base,
+ const Char *systemId,
+ const Char *publicId) = 0;
+ };
+
+ class Converter {
+ public:
+ virtual int convert(const char *) = 0;
+ virtual void release() = 0;
+ };
+
+ class EncodingManager {
+ public:
+ virtual bool getEncoding(const Char *name,
+ int map[256],
+ Converter *&converter) = 0;
+ };
+
+ virtual void setElementHandler(ElementHandler *handler) = 0;
+ virtual void setCharacterDataHandler(CharacterDataHandler *handler) = 0;
+ virtual void setProcessingInstructionHandler(ProcessingInstructionHandler *handler) = 0;
+ virtual void setOtherHandler(OtherHandler &handler) = 0;
+ virtual void setDeclHandler(DeclHandler &handler) = 0;
+ virtual void setExternalEntityRefHandler(ExternalEntityRefHandler &handler) = 0;
+ virtual void setEncodingManager(EncodingManager &manager) = 0;
+ virtual void setBase(const Char *base) = 0;
+ virtual const Char *getBase() = 0;
+ virtual int parse(const char *s, int len, bool isFinal) = 0;
+ virtual char *getBuffer(int len) = 0;
+ virtual int parseBuffer(int len, bool isFinal) = 0;
+ virtual XMLParser *externalEntityParserCreate(const Char *openEntityNames,
+ const Char *encoding) = 0;
+ enum Error {
+ ERROR_NONE,
+ ERROR_NO_MEMORY,
+ ERROR_SYNTAX,
+ ERROR_NO_ELEMENTS,
+ ERROR_INVALID_TOKEN,
+ ERROR_UNCLOSED_TOKEN,
+ ERROR_PARTIAL_CHAR,
+ ERROR_TAG_MISMATCH,
+ ERROR_DUPLICATE_ATTRIBUTE,
+ ERROR_JUNK_AFTER_DOC_ELEMENT,
+ ERROR_PARAM_ENTITY_REF,
+ ERROR_UNDEFINED_ENTITY,
+ ERROR_RECURSIVE_ENTITY_REF,
+ ERROR_ASYNC_ENTITY,
+ ERROR_BAD_CHAR_REF,
+ ERROR_BINARY_ENTITY_REF,
+ ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
+ ERROR_MISPLACED_PI,
+ ERROR_UNKNOWN_ENCODING,
+ ERROR_INCORRECT_ENCODING,
+ ERROR_UNCLOSED_CDATA_SECTION,
+ ERROR_EXTERNAL_ENTITY_HANDLING
+ };
+
+ virtual Error getErrorCode() = 0;
+ virtual int getCurrentLineNumber() = 0;
+ virtual int getCurrentColumnNumber() = 0;
+ virtual long getCurrentByteIndex() = 0;
+ virtual void release() = 0;
+ static const LChar *errorString(int code);
+ static XMLParser *create(const Char *encoding);
+};
--- /dev/null
+#define CHARSET_MAX 41
+
+static
+const char *getTok(const char **pp)
+{
+ enum { inAtom, inString, init, inComment };
+ int state = init;
+ const char *tokStart = 0;
+ for (;;) {
+ switch (**pp) {
+ case '\0':
+ return 0;
+ case ' ':
+ case '\r':
+ case '\t':
+ case '\n':
+ if (state == inAtom)
+ return tokStart;
+ break;
+ case '(':
+ if (state == inAtom)
+ return tokStart;
+ if (state != inString)
+ state++;
+ break;
+ case ')':
+ if (state > init)
+ --state;
+ else if (state != inString)
+ return 0;
+ break;
+ case ';':
+ case '/':
+ case '=':
+ if (state == inAtom)
+ return tokStart;
+ if (state == init)
+ return (*pp)++;
+ break;
+ case '\\':
+ ++*pp;
+ if (**pp == '\0')
+ return 0;
+ break;
+ case '"':
+ switch (state) {
+ case inString:
+ ++*pp;
+ return tokStart;
+ case inAtom:
+ return tokStart;
+ case init:
+ tokStart = *pp;
+ state = inString;
+ break;
+ }
+ break;
+ default:
+ if (state == init) {
+ tokStart = *pp;
+ state = inAtom;
+ }
+ break;
+ }
+ ++*pp;
+ }
+ /* not reached */
+}
+
+/* key must be lowercase ASCII */
+
+static
+int matchkey(const char *start, const char *end, const char *key)
+{
+ if (!start)
+ return 0;
+ for (; start != end; start++, key++)
+ if (*start != *key && *start != 'A' + (*key - 'a'))
+ return 0;
+ return *key == '\0';
+}
+
+void getXMLCharset(const char *buf, char *charset)
+{
+ const char *next, *p;
+
+ charset[0] = '\0';
+ next = buf;
+ p = getTok(&next);
+ if (matchkey(p, next, "text"))
+ strcpy(charset, "us-ascii");
+ else if (!matchkey(p, next, "application"))
+ return;
+ p = getTok(&next);
+ if (!p || *p != '/')
+ return;
+ p = getTok(&next);
+ if (matchkey(p, next, "xml"))
+ isXml = 1;
+ p = getTok(&next);
+ while (p) {
+ if (*p == ';') {
+ p = getTok(&next);
+ if (matchkey(p, next, "charset")) {
+ p = getTok(&next);
+ if (p && *p == '=') {
+ p = getTok(&next);
+ if (p) {
+ char *s = charset;
+ if (*p == '"') {
+ while (++p != next - 1) {
+ if (*p == '\\')
+ ++p;
+ if (s == charset + CHARSET_MAX - 1) {
+ charset[0] = '\0';
+ break;
+ }
+ *s++ = *p;
+ }
+ *s++ = '\0';
+ }
+ else {
+ if (next - p > CHARSET_MAX - 1)
+ break;
+ while (p != next)
+ *s++ = *p++;
+ *s = 0;
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ p = getTok(&next);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char buf[CHARSET_MAX];
+ getXMLCharset(argv[1], buf);
+ printf("charset = \"%s\"\n", buf);
+ return 0;
+}