From: James Clark Date: Mon, 2 Feb 1998 12:40:34 +0000 (+0000) Subject: Initial Revision X-Git-Tag: REC1_0~29 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=347e19a6586bfafbbf76186e47758ec0fad3d731;p=libexpat Initial Revision --- diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c new file mode 100755 index 00000000..d70c361d --- /dev/null +++ b/expat/xmlparse/xmlparse.c @@ -0,0 +1,1372 @@ + +/* FIXME + +Normalize tokenized attribute values. + +Do attribute defaulting. + +*/ + + +#include "xmlparse.h" +#include "xmltok.h" +#include "xmlrole.h" +#include "hashtable.h" + +#include +#include + +typedef struct { + const char *name; + const char *textPtr; + size_t textLen; + const char *systemId; + const char *publicId; + const char *notation; + char open; + char magic; +} ENTITY; + +#define INIT_BLOCK_SIZE 1024 + +typedef struct block { + struct block *next; + int size; + char s[1]; +} BLOCK; + +typedef struct { + BLOCK *blocks; + BLOCK *freeBlocks; + const char *end; + char *ptr; + char *start; +} STRING_POOL; + +typedef struct { + HASH_TABLE generalEntities; + HASH_TABLE paramEntities; + STRING_POOL pool; + int containsRef; + int standalone; + char *groupConnector; + size_t groupSize; +} DTD; + +typedef enum XML_Error Processor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr); + +static Processor prologProcessor; +static Processor contentProcessor; +static Processor epilogProcessor; + +static +int doContent(XML_Parser parser, + int startTagLevel, + const ENCODING *enc, + const char *start, + const char *end, + const char **endPtr); + +static enum XML_Error +checkGeneralTextEntity(XML_Parser parser, + const char *s, const char *end, + const char **nextPtr, + const ENCODING **enc); +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s); +static enum XML_Error +storeAttributeValue(XML_Parser parser, const ENCODING *, const char *, const char *, + STRING_POOL *); +static enum XML_Error +checkAttributeUniqueness(XML_Parser parser, const ENCODING *enc, int nAtts); +static enum XML_Error +storeEntityValue(XML_Parser parser, const char *start, const char *end); +static int +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); + +static +const char *pushTag(XML_Parser parser, const ENCODING *enc, const char *rawName); + + +static int dtdInit(DTD *); +static void dtdDestroy(DTD *); +static void poolInit(STRING_POOL *); +static void poolClear(STRING_POOL *); +static void poolDestroy(STRING_POOL *); +static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static int poolGrow(STRING_POOL *pool); + +#define poolStart(pool) ((pool)->start) +#define poolEnd(pool) ((pool)->ptr) +#define poolLength(pool) ((pool)->ptr - (pool)->start) +#define poolDiscard(pool) ((pool)->ptr = (pool)->start) +#define poolFinish(pool) ((pool)->start = (pool)->ptr) +#define poolAppendByte(pool, c) \ + (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + +typedef struct { + char *buffer; + /* first character to be parsed */ + const char *bufferPtr; + /* past last character to be parsed */ + char *bufferEnd; + /* allocated end of buffer */ + const char *bufferLim; + long bufferEndByteIndex; + void *userData; + XML_StartElementHandler startElementHandler; + XML_EndElementHandler endElementHandler; + XML_CharacterDataHandler characterDataHandler; + XML_ProcessingInstructionHandler processingInstructionHandler; + const ENCODING *encoding; + INIT_ENCODING initEncoding; + PROLOG_STATE prologState; + Processor *processor; + enum XML_Error errorCode; + const char *errorPtr; + int tagLevel; + ENTITY *declEntity; + DTD dtd; + char *tagStack; + char *tagStackPtr; + const char *tagStackEnd; + int attsSize; + ATTRIBUTE *atts; + POSITION position; + long errorByteIndex; + STRING_POOL tempPool; +} Parser; + +#define userData (((Parser *)parser)->userData) +#define startElementHandler (((Parser *)parser)->startElementHandler) +#define endElementHandler (((Parser *)parser)->endElementHandler) +#define characterDataHandler (((Parser *)parser)->characterDataHandler) +#define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler) +#define encoding (((Parser *)parser)->encoding) +#define initEncoding (((Parser *)parser)->initEncoding) +#define prologState (((Parser *)parser)->prologState) +#define processor (((Parser *)parser)->processor) +#define errorCode (((Parser *)parser)->errorCode) +#define errorPtr (((Parser *)parser)->errorPtr) +#define errorByteIndex (((Parser *)parser)->errorByteIndex) +#define position (((Parser *)parser)->position) +#define tagLevel (((Parser *)parser)->tagLevel) +#define buffer (((Parser *)parser)->buffer) +#define bufferPtr (((Parser *)parser)->bufferPtr) +#define bufferEnd (((Parser *)parser)->bufferEnd) +#define bufferEndByteIndex (((Parser *)parser)->bufferEndByteIndex) +#define bufferLim (((Parser *)parser)->bufferLim) +#define dtd (((Parser *)parser)->dtd) +#define declEntity (((Parser *)parser)->declEntity) +#define tagStackEnd (((Parser *)parser)->tagStackEnd) +#define tagStackPtr (((Parser *)parser)->tagStackPtr) +#define tagStack (((Parser *)parser)->tagStack) +#define atts (((Parser *)parser)->atts) +#define attsSize (((Parser *)parser)->attsSize) +#define tempPool (((Parser *)parser)->tempPool) + +XML_Parser XML_ParserCreate(const char *encodingName) +{ + XML_Parser parser = malloc(sizeof(Parser)); + if (!parser) + return parser; + processor = prologProcessor; + XmlInitEncoding(&initEncoding, &encoding); + XmlPrologStateInit(&prologState); + userData = 0; + startElementHandler = 0; + endElementHandler = 0; + characterDataHandler = 0; + processingInstructionHandler = 0; + buffer = 0; + bufferPtr = 0; + bufferEnd = 0; + bufferEndByteIndex = 0; + bufferLim = 0; + memset(&position, 0, sizeof(POSITION)); + errorCode = XML_ERROR_NONE; + errorByteIndex = 0; + errorPtr = 0; + tagLevel = 0; + tagStack = malloc(1024); + tagStackPtr = tagStack; + attsSize = 1024; + atts = malloc(attsSize * sizeof(ATTRIBUTE)); + poolInit(&tempPool); + if (!dtdInit(&dtd) || !atts || !tagStack) { + XML_ParserFree(parser); + return 0; + } + tagStackEnd = tagStack + 1024; + *tagStackPtr++ = '\0'; + return parser; +} + +void XML_ParserFree(XML_Parser parser) +{ + poolDestroy(&tempPool); + dtdDestroy(&dtd); + free((void *)tagStack); + free((void *)atts); + free(buffer); + free(parser); +} + +void XML_SetUserData(XML_Parser parser, void *p) +{ + userData = p; +} + +void XML_SetElementHandler(XML_Parser parser, + XML_StartElementHandler start, + XML_EndElementHandler end) +{ + startElementHandler = start; + endElementHandler = end; +} + +void XML_SetCharacterDataHandler(XML_Parser parser, + XML_CharacterDataHandler handler) +{ + characterDataHandler = handler; +} + +void XML_SetProcessingInstructionHandler(XML_Parser parser, + XML_ProcessingInstructionHandler handler) +{ + processingInstructionHandler = handler; +} + +int XML_Parse(XML_Parser parser, const char *s, size_t len, int isFinal) +{ + bufferEndByteIndex += len; + if (len == 0) { + if (!isFinal) + return 1; + errorCode = processor(parser, bufferPtr, bufferEnd, 0); + return errorCode == XML_ERROR_NONE; + } + else if (bufferPtr == bufferEnd) { + const char *end; + int nLeftOver; + if (isFinal) { + errorCode = processor(parser, s, s + len, 0); + if (errorCode == XML_ERROR_NONE) + return 1; + if (errorPtr) { + errorByteIndex = bufferEndByteIndex - (s + len - errorPtr); + XmlUpdatePosition(encoding, s, errorPtr, &position); + } + return 0; + } + errorCode = processor(parser, s, s + len, &end); + if (errorCode != XML_ERROR_NONE) { + if (errorPtr) { + errorByteIndex = bufferEndByteIndex - (s + len - errorPtr); + XmlUpdatePosition(encoding, s, errorPtr, &position); + } + return 0; + } + XmlUpdatePosition(encoding, s, end, &position); + nLeftOver = s + len - end; + if (nLeftOver) { + if (buffer == 0 || nLeftOver > bufferLim - buffer) { + /* FIXME avoid integer overflow */ + buffer = realloc(buffer, len * 2); + if (!buffer) { + errorCode = XML_ERROR_NO_MEMORY; + return 0; + } + bufferLim = buffer + len * 2; + } + memcpy(buffer, end, nLeftOver); + bufferPtr = buffer; + bufferEnd = buffer + nLeftOver; + } + return 1; + } + else { + memcpy(XML_GetBuffer(parser, len), s, len); + return XML_ParseBuffer(parser, len, isFinal); + } +} + +int XML_ParseBuffer(XML_Parser parser, size_t len, int isFinal) +{ + const char *start = bufferPtr; + bufferEnd += len; + errorCode = processor(parser, bufferPtr, bufferEnd, + isFinal ? (const char **)0 : &bufferPtr); + if (errorCode == XML_ERROR_NONE) { + if (!isFinal) + XmlUpdatePosition(encoding, start, bufferPtr, &position); + return 1; + } + else { + if (errorPtr) { + errorByteIndex = bufferEndByteIndex - (bufferEnd - errorPtr); + XmlUpdatePosition(encoding, start, errorPtr, &position); + } + return 0; + } +} + +void *XML_GetBuffer(XML_Parser parser, size_t len) +{ + if (len > bufferLim - bufferEnd) { + /* FIXME avoid integer overflow */ + int neededSize = len + (bufferEnd - bufferPtr); + if (neededSize <= bufferLim - buffer) { + memmove(buffer, bufferPtr, bufferEnd - bufferPtr); + bufferEnd = buffer + (bufferEnd - bufferPtr); + bufferPtr = buffer; + } + else { + char *newBuf; + size_t bufferSize = bufferLim - bufferPtr; + do { + bufferSize *= 2; + } while (bufferSize < neededSize); + newBuf = malloc(bufferSize); + if (newBuf == 0) { + errorCode = XML_ERROR_NO_MEMORY; + return 0; + } + bufferLim = newBuf + bufferSize; + memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); + bufferEnd = newBuf + (bufferEnd - bufferPtr); + bufferPtr = buffer = newBuf; + } + } + return bufferEnd; +} + +int XML_GetErrorCode(XML_Parser parser) +{ + return errorCode; +} + +int XML_GetErrorLineNumber(XML_Parser parser) +{ + return position.lineNumber; +} + +int XML_GetErrorColumnNumber(XML_Parser parser) +{ + return position.columnNumber; +} + +long XML_GetErrorByteIndex(XML_Parser parser) +{ + return errorByteIndex; +} + +const char *XML_ErrorString(int code) +{ + static const char *message[] = { + 0, + "out of memory", + "syntax error", + "no element found", + "not well-formed", + "unclosed token", + "unclosed token", + "mismatched tag", + "duplicate attribute", + "junk after document element", + "parameter entity reference not allowed within declaration in internal subset", + "undefined entity", + "recursive entity reference", + "asynchronous entity", + "reference to invalid character number", + "reference to binary entity", + "reference to external entity in attribute", + "xml processing instruction not at start of external entity", + "unknown encoding", + "encoding specified in XML declaration is incorrect" + }; + if (code > 0 && code < sizeof(message)/sizeof(message[0])) + return message[code]; + return 0; +} + +static +enum XML_ERROR contentProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + return doContent(parser, 0, encoding, start, end, endPtr); +} + +static enum XML_Error +doContent(XML_Parser parser, + int startTagLevel, + const ENCODING *enc, + const char *s, + const char *end, + const char **nextPtr) +{ + static const char *nullPtr = 0; + const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); + for (;;) { + const char **appAtts; + const char *next; + int tok = XmlContentTok(enc, s, end, &next); + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + if (startTagLevel > 0) { + if (tagLevel != startTagLevel) { + errorPtr = s; + return XML_ERROR_ASYNC_ENTITY; + } + return XML_ERROR_NONE; + } + errorPtr = s; + return XML_ERROR_NO_ELEMENTS; + case XML_TOK_INVALID: + errorPtr = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + errorPtr = s; + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + errorPtr = s; + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity; + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); + poolDiscard(&dtd.pool); + if (!entity) { + if (!dtd.containsRef || dtd.standalone) { + errorPtr = s; + return XML_ERROR_UNDEFINED_ENTITY; + } + break; + } + if (entity->magic) { + if (characterDataHandler) + characterDataHandler(userData, entity->textPtr, entity->textLen); + break; + } + if (entity->open) { + errorPtr = s; + return XML_ERROR_RECURSIVE_ENTITY_REF; + } + if (entity->notation) { + errorPtr = s; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (entity) { + if (entity->textPtr) { + enum XML_Error result; + entity->open = 1; + result = doContent(parser, + tagLevel, + utf8, + entity->textPtr, + entity->textPtr + entity->textLen, + 0); + entity->open = 0; + if (result) { + errorPtr = s; + return result; + } + } + } + break; + } + case XML_TOK_START_TAG_WITH_ATTS: + { + const char *name; + enum XML_Error result = storeAtts(parser, enc, s); + if (result) + return result; + name = pushTag(parser, enc, s + enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + ++tagLevel; + if (startElementHandler) { + startElementHandler(userData, name, (const char **)atts); + poolClear(&tempPool); + } + break; + } + case XML_TOK_START_TAG_NO_ATTS: + { + const char *name = pushTag(parser, enc, s + enc->minBytesPerChar); + ++tagLevel; + if (!name) + return XML_ERROR_NO_MEMORY; + if (startElementHandler) + startElementHandler(userData, name, &nullPtr); + break; + } + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: + { + enum XML_Error result = storeAtts(parser, enc, s); + if (result) + return result; + appAtts = (const char **)atts; + goto emptyElement; + } + case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + appAtts = &nullPtr; + emptyElement: + if (startElementHandler || endElementHandler) { + const char *nameStart = s + enc->minBytesPerChar; + const char *name = poolStoreString(&tempPool, enc, nameStart, + nameStart + + XmlNameLength(enc, nameStart)); + if (!name) + return XML_ERROR_NO_MEMORY; + if (startElementHandler) + startElementHandler(userData, name, appAtts); + if (endElementHandler) + endElementHandler(userData, name); + poolClear(&tempPool); + } + if (tagLevel == 0) + return epilogProcessor(parser, next, end, nextPtr); + break; + case XML_TOK_END_TAG: + if (tagLevel == startTagLevel) { + errorPtr = s; + return XML_ERROR_ASYNC_ENTITY; + } + else { + const char *rawNameStart = s + enc->minBytesPerChar * 2; + const char *nameEnd; + const char *name = poolStoreString(&tempPool, enc, rawNameStart, + rawNameStart + + XmlNameLength(enc, rawNameStart)); + if (!name) + return XML_ERROR_NO_MEMORY; + nameEnd = poolEnd(&tempPool); + for (;;) { + --nameEnd; + --tagStackPtr; + if (nameEnd == name) { + if (tagStackPtr[-1] == '\0') + break; + return XML_ERROR_TAG_MISMATCH; + } + if (*nameEnd != *tagStackPtr) + return XML_ERROR_TAG_MISMATCH; + } + --tagLevel; + if (endElementHandler) + endElementHandler(userData, name); + poolDiscard(&tempPool); + if (tagLevel == 0) + return epilogProcessor(parser, next, end, nextPtr); + } + break; + case XML_TOK_CHAR_REF: + { + int n = XmlCharRefNumber(enc, s); + if (n < 0) { + errorPtr = s; + return XML_ERROR_BAD_CHAR_REF; + } + if (characterDataHandler) { + char buf[XML_MAX_BYTES_PER_CHAR]; + characterDataHandler(userData, buf, XmlEncode(utf8, n, buf)); + } + } + break; + case XML_TOK_XML_DECL: + errorPtr = s; + return XML_ERROR_MISPLACED_XML_PI; + case XML_TOK_DATA_NEWLINE: + if (characterDataHandler) { + char c = '\n'; + characterDataHandler(userData, &c, 1); + } + break; + case XML_TOK_CDATA_SECTION: + if (characterDataHandler) { + if (!poolAppend(&tempPool, + enc, + s + enc->minBytesPerChar * 9, + next - enc->minBytesPerChar * 3)) + return XML_ERROR_NO_MEMORY; + characterDataHandler(userData, poolStart(&tempPool), poolLength(&tempPool)); + poolDiscard(&tempPool); + } + break; + case XML_TOK_DATA_CHARS: + if (characterDataHandler) { + /* FIXME Do this efficiently */ + if (!poolAppend(&tempPool, enc, s, next)) + return XML_ERROR_NO_MEMORY; + characterDataHandler(userData, poolStart(&tempPool), poolLength(&tempPool)); + poolDiscard(&tempPool); + } + break; + case XML_TOK_PI: + if (!reportProcessingInstruction(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + break; + } + s = next; + } + /* not reached */ +} + +static +const char *pushTag(XML_Parser parser, const ENCODING *enc, const char *rawName) +{ + const char *name = tagStackPtr; + const char *rawNameEnd = rawName + XmlNameLength(enc, rawName); + for (;;) { + char *newStack; + size_t newSize; + XmlConvert(enc, XML_UTF8_ENCODING, + &rawName, rawNameEnd, + &tagStackPtr, tagStackEnd - 1); + if (rawName == rawNameEnd) + break; + newSize = (tagStackEnd - tagStack) >> 1; + newStack = realloc(tagStack, newSize); + if (!newStack) + return 0; + tagStackEnd = newStack + newSize; + tagStackPtr = newStack + (tagStackPtr - tagStack); + name = newStack + (name - tagStack); + tagStack = newStack; + } + *tagStackPtr++ = '\0'; + return name; +} + +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *s) +{ + const char **appAtts = 0; + int i; + int n = XmlGetAttributes(enc, s, attsSize, atts); + if (n > attsSize) { + attsSize = 2*n; + atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE)); + if (!atts) + return XML_ERROR_NO_MEMORY; + XmlGetAttributes(enc, s, n, atts); + } + if (n > 1) { + enum XML_Error result = checkAttributeUniqueness(parser, enc, n); + if (result) + return result; + } + if (startElementHandler) + appAtts = (const char **)atts; + for (i = 0; i < n; i++) { + if (appAtts) { + appAtts[i << 1] = poolStoreString(&tempPool, enc, + atts[i].name, + atts[i].name + + XmlNameLength(enc, atts[i].name)); + if (appAtts[i << 1] == 0) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } + if (!atts[i].normalized) { + enum XML_Error result + = storeAttributeValue(parser, enc, + atts[i].valuePtr, + atts[i].valueEnd, + &tempPool); + if (result) + return result; + if (!poolAppendByte(&tempPool, '\0')) + return XML_ERROR_NO_MEMORY; + if (appAtts) { + appAtts[(i << 1) + 1] = poolStart(&tempPool); + poolFinish(&tempPool); + } + else + poolDiscard(&tempPool); + } + else if (appAtts) { + appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); + if (appAtts[(i << 1) + 1] == 0) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } + } + if (appAtts) + appAtts[n << 1] = 0; + return XML_ERROR_NONE; +} + +static enum XML_Error +prologProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + for (;;) { + const char *next; + int tok = XmlPrologTok(encoding, s, end, &next); + if (tok <= 0) { + if (nextPtr != 0 && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + errorPtr = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_NONE: + return XML_ERROR_NO_ELEMENTS; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_TRAILING_CR: + errorPtr = s + encoding->minBytesPerChar; + return XML_ERROR_NO_ELEMENTS; + default: + abort(); + } + } + switch (XmlTokenRole(&prologState, tok, s, next, encoding)) { + case XML_ROLE_XML_DECL: + { + const char *encodingName = 0; + const ENCODING *newEncoding = 0; + const char *version; + int standalone = -1; + if (!XmlParseXmlDecl(0, + encoding, + s, + next, + &errorPtr, + &version, + &encodingName, + &newEncoding, + &standalone)) + return XML_ERROR_SYNTAX; + if (newEncoding) { + if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { + errorPtr = encodingName; + return XML_ERROR_INCORRECT_ENCODING; + } + encoding = newEncoding; + } + else if (encodingName) { + errorPtr = encodingName; + return XML_ERROR_UNKNOWN_ENCODING; + } + if (standalone == 1) + dtd.standalone = 1; + break; + } + case XML_ROLE_DOCTYPE_SYSTEM_ID: + dtd.containsRef = 1; + break; + case XML_ROLE_DOCTYPE_PUBLIC_ID: + case XML_ROLE_ENTITY_PUBLIC_ID: + case XML_ROLE_NOTATION_PUBLIC_ID: + if (!XmlIsPublicId(encoding, s, next, &errorPtr)) + return XML_ERROR_SYNTAX; + break; + case XML_ROLE_INSTANCE_START: + processor = contentProcessor; + return contentProcessor(parser, s, end, nextPtr); + case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: + case XML_ROLE_FIXED_ATTRIBUTE_VALUE: + { + enum XML_Error result + = storeAttributeValue(parser, encoding, s + encoding->minBytesPerChar, + next - encoding->minBytesPerChar, + &dtd.pool); + if (result) + return result; + poolFinish(&dtd.pool); + break; + } + case XML_ROLE_ENTITY_VALUE: + { + enum XML_Error result = storeEntityValue(parser, s, next); + if (result != XML_ERROR_NONE) + return result; + } + break; + case XML_ROLE_ENTITY_SYSTEM_ID: + if (declEntity) { + declEntity->systemId = poolStoreString(&dtd.pool, encoding, + s + encoding->minBytesPerChar, + next - encoding->minBytesPerChar); + if (!declEntity->systemId) + return XML_ERROR_NO_MEMORY; + poolFinish(&dtd.pool); + } + break; + case XML_ROLE_PARAM_ENTITY_REF: + { + const char *name = poolStoreString(&dtd.pool, encoding, + s + encoding->minBytesPerChar, + next - encoding->minBytesPerChar); + ENTITY *entity; + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); + poolDiscard(&dtd.pool); + if (!entity) { + if (!dtd.containsRef || dtd.standalone) { + errorPtr = s; + return XML_ERROR_UNDEFINED_ENTITY; + } + } + } + break; + case XML_ROLE_ENTITY_NOTATION_NAME: + if (declEntity) { + declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next); + if (!declEntity->notation) + return XML_ERROR_NO_MEMORY; + poolFinish(&dtd.pool); + } + break; + case XML_ROLE_GENERAL_ENTITY_NAME: + { + const char *name = poolStoreString(&dtd.pool, encoding, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); + if (declEntity->name != name) { + poolDiscard(&dtd.pool); + declEntity = 0; + } + else + poolFinish(&dtd.pool); + } + break; + case XML_ROLE_PARAM_ENTITY_NAME: + { + const char *name = poolStoreString(&dtd.pool, encoding, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY)); + if (declEntity->name != name) { + poolDiscard(&dtd.pool); + declEntity = 0; + } + else + poolFinish(&dtd.pool); + } + break; + case XML_ROLE_ERROR: + errorPtr = s; + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + return XML_ERROR_PARAM_ENTITY_REF; + case XML_TOK_XML_DECL: + return XML_ERROR_MISPLACED_XML_PI; + default: + return XML_ERROR_SYNTAX; + } + case XML_ROLE_GROUP_OPEN: + if (prologState.level >= dtd.groupSize) { + if (dtd.groupSize) + dtd.groupConnector = realloc(dtd.groupConnector, dtd.groupSize *= 2); + else + dtd.groupConnector = malloc(dtd.groupSize = 32); + if (!dtd.groupConnector) + return XML_ERROR_NO_MEMORY; + } + dtd.groupConnector[prologState.level] = 0; + break; + case XML_ROLE_GROUP_SEQUENCE: + if (dtd.groupConnector[prologState.level] == '|') { + *nextPtr = s; + return XML_ERROR_SYNTAX; + } + dtd.groupConnector[prologState.level] = ','; + break; + case XML_ROLE_GROUP_CHOICE: + if (dtd.groupConnector[prologState.level] == ',') { + *nextPtr = s; + return XML_ERROR_SYNTAX; + } + dtd.groupConnector[prologState.level] = '|'; + break; + case XML_ROLE_NONE: + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + dtd.containsRef = 1; + break; + case XML_TOK_PI: + if (!reportProcessingInstruction(parser, encoding, s, next)) + return XML_ERROR_NO_MEMORY; + break; + } + break; + } + s = next; + } + /* not reached */ +} + +static +enum XML_Error epilogProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + processor = epilogProcessor; + for (;;) { + const char *next; + int tok = XmlPrologTok(encoding, s, end, &next); + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + if (nextPtr) + *nextPtr = end; + return XML_ERROR_NONE; + case XML_TOK_PROLOG_S: + case XML_TOK_COMMENT: + break; + case XML_TOK_PI: + if (!reportProcessingInstruction(parser, encoding, s, next)) + return XML_ERROR_NO_MEMORY; + break; + case XML_TOK_INVALID: + errorPtr = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + errorPtr = s; + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + errorPtr = s; + return XML_ERROR_PARTIAL_CHAR; + default: + errorPtr = s; + return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; + } + s = next; + } +} + +static enum XML_Error +checkAttributeUniqueness(XML_Parser parser, const ENCODING *enc, int nAtts) +{ + int i; + for (i = 1; i < nAtts; i++) { + int j; + for (j = 0; j < i; j++) { + if (XmlSameName(enc, atts[i].name, atts[j].name)) { + errorPtr = atts[i].name; + return XML_ERROR_DUPLICATE_ATTRIBUTE; + } + } + } + return XML_ERROR_NONE; +} + +static enum XML_Error +storeAttributeValue(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end, + STRING_POOL *pool) +{ + const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); + for (;;) { + const char *next; + int tok = XmlAttributeValueTok(enc, ptr, end, &next); + switch (tok) { + case XML_TOK_NONE: + return XML_ERROR_NONE; + case XML_TOK_INVALID: + errorPtr = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + errorPtr = ptr; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_CHAR_REF: + if (XmlCharRefNumber(enc, ptr) < 0) { + errorPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + else { + char buf[XML_MAX_BYTES_PER_CHAR]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + errorPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + n = XmlEncode(utf8, n, buf); + if (!n) { + errorPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + for (i = 0; i < n; i++) { + if (!poolAppendByte(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } + } + break; + case XML_TOK_DATA_CHARS: + if (!poolAppend(pool, enc, ptr, next)) + return XML_ERROR_NO_MEMORY; + break; + break; + case XML_TOK_TRAILING_CR: + next = ptr + enc->minBytesPerChar; + /* fall through */ + case XML_TOK_DATA_NEWLINE: + if (!poolAppendByte(pool, ' ')) + return XML_ERROR_NO_MEMORY; + break; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&dtd.pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity; + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); + poolDiscard(&dtd.pool); + if (!entity) { + if (!dtd.containsRef) { + errorPtr = ptr; + return XML_ERROR_UNDEFINED_ENTITY; + } + } + else if (entity->open) { + errorPtr = ptr; + return XML_ERROR_RECURSIVE_ENTITY_REF; + } + else if (entity->notation) { + errorPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + else if (entity->magic) { + int i; + for (i = 0; i < entity->textLen; i++) + if (!poolAppendByte(pool, entity->textPtr[i])) + return XML_ERROR_NO_MEMORY; + } + else if (!entity->textPtr) { + errorPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } + else { + enum XML_Error result; + const char *textEnd = entity->textPtr + entity->textLen; + entity->open = 1; + result = storeAttributeValue(parser, utf8, entity->textPtr, textEnd, pool); + entity->open = 0; + if (result) { + errorPtr = ptr; + return result; + } + } + } + break; + default: + abort(); + } + ptr = next; + } + /* not reached */ +} + +static +enum XML_Error storeEntityValue(XML_Parser parser, + const char *entityTextPtr, + const char *entityTextEnd) +{ + const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); + STRING_POOL *pool = &(dtd.pool); + entityTextPtr += encoding->minBytesPerChar; + entityTextEnd -= encoding->minBytesPerChar; + for (;;) { + const char *next; + int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next); + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + errorPtr = entityTextPtr; + return XML_ERROR_SYNTAX; + case XML_TOK_NONE: + if (declEntity) { + declEntity->textPtr = pool->start; + declEntity->textLen = pool->ptr - pool->start; + poolFinish(pool); + } + else + poolDiscard(pool); + return XML_ERROR_NONE; + case XML_TOK_ENTITY_REF: + case XML_TOK_DATA_CHARS: + if (!poolAppend(pool, encoding, entityTextPtr, next)) + return XML_ERROR_NO_MEMORY; + break; + case XML_TOK_TRAILING_CR: + next = entityTextPtr + encoding->minBytesPerChar; + /* fall through */ + case XML_TOK_DATA_NEWLINE: + if (pool->end == pool->ptr && !poolGrow(pool)) + return XML_ERROR_NO_MEMORY; + *(pool->ptr)++ = '\n'; + break; + case XML_TOK_CHAR_REF: + { + char buf[XML_MAX_BYTES_PER_CHAR]; + int i; + int n = XmlCharRefNumber(encoding, entityTextPtr); + if (n < 0) { + errorPtr = entityTextPtr; + return XML_ERROR_BAD_CHAR_REF; + } + n = XmlEncode(utf8, n, buf); + if (!n) { + errorPtr = entityTextPtr; + return XML_ERROR_BAD_CHAR_REF; + } + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && !poolGrow(pool)) + return XML_ERROR_NO_MEMORY; + *(pool->ptr)++ = buf[i]; + } + } + break; + case XML_TOK_PARTIAL: + errorPtr = entityTextPtr; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_INVALID: + errorPtr = next; + return XML_ERROR_INVALID_TOKEN; + default: + abort(); + } + entityTextPtr = next; + } + /* not reached */ +} + +static int +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +{ + const char *target; + int targetLen; + const char *data; + if (!processingInstructionHandler) + return 1; + target = start + enc->minBytesPerChar * 2; + targetLen = XmlNameLength(enc, target); + target = poolStoreString(&tempPool, enc, target, target + targetLen); + if (!target) + return 0; + poolFinish(&tempPool); + data = XmlSkipS(enc, start + enc->minBytesPerChar*2 + targetLen); + data = poolStoreString(&tempPool, enc, + data, + end - enc->minBytesPerChar*2); + if (!data) + return 0; + processingInstructionHandler(userData, target, data); + poolClear(&tempPool); + return 1; +} + +static int dtdInit(DTD *p) +{ + static const char *names[] = { "lt", "amp", "gt", "quot", "apos" }; + static const char chars[] = { '<', '&', '>', '"', '\'' }; + int i; + + poolInit(&(p->pool)); + hashTableInit(&(p->generalEntities)); + for (i = 0; i < 5; i++) { + ENTITY *entity = (ENTITY *)lookup(&(p->generalEntities), names[i], sizeof(ENTITY)); + if (!entity) + return 0; + entity->textPtr = chars + i; + entity->textLen = 1; + entity->magic = 1; + } + hashTableInit(&(p->paramEntities)); + p->containsRef = 0; + p->groupSize = 0; + p->groupConnector = 0; + return 1; +} + +static void dtdDestroy(DTD *p) +{ + poolDestroy(&(p->pool)); + hashTableDestroy(&(p->generalEntities)); + hashTableDestroy(&(p->paramEntities)); + free(p->groupConnector); +} + +static +void poolInit(STRING_POOL *pool) +{ + pool->blocks = 0; + pool->freeBlocks = 0; + pool->start = 0; + pool->ptr = 0; + pool->end = 0; +} + +static +void poolClear(STRING_POOL *pool) +{ + if (!pool->freeBlocks) + pool->freeBlocks = pool->blocks; + else { + BLOCK *p = pool->blocks; + while (p) { + BLOCK *tem = p->next; + p->next = pool->freeBlocks; + pool->freeBlocks = p; + p = tem; + } + } + pool->blocks = 0; + pool->start = 0; + pool->ptr = 0; + pool->end = 0; +} + +static +void poolDestroy(STRING_POOL *pool) +{ + BLOCK *p = pool->blocks; + while (p) { + BLOCK *tem = p->next; + free(p); + p = tem; + } + pool->blocks = 0; + p = pool->freeBlocks; + while (p) { + BLOCK *tem = p->next; + free(p); + p = tem; + } + pool->freeBlocks = 0; + pool->ptr = 0; + pool->start = 0; + pool->end = 0; +} + +static +const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + for (;;) { + XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end); + if (ptr == end) + break; + if (!poolGrow(pool)) + return 0; + } + return pool->start; +} + + +static +const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + if (!poolAppend(pool, enc, ptr, end)) + return 0; + if (pool->ptr == pool->end && !poolGrow(pool)) + return 0; + *(pool->ptr)++ = 0; + return pool->start; +} + +static +int poolGrow(STRING_POOL *pool) +{ + if (pool->freeBlocks) { + if (pool->start == 0) { + pool->blocks = pool->freeBlocks; + pool->freeBlocks = pool->freeBlocks->next; + pool->blocks->next = 0; + pool->start = pool->blocks->s; + pool->end = pool->start + pool->blocks->size; + pool->ptr = pool->start; + return 1; + } + if (pool->end - pool->start < pool->freeBlocks->size) { + BLOCK *tem = pool->freeBlocks->next; + pool->freeBlocks->next = pool->blocks; + pool->blocks = pool->freeBlocks; + pool->freeBlocks = tem; + memcpy(pool->blocks->s, pool->start, pool->end - pool->start); + pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->start = pool->blocks->s; + pool->end = pool->start + pool->blocks->size; + return 1; + } + } + if (pool->blocks && pool->start == pool->blocks->s) { + size_t blockSize = (pool->end - pool->start)*2; + pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize); + if (!pool->blocks) + return 0; + pool->blocks->size = blockSize; + pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->start = pool->blocks->s; + pool->end = pool->start + blockSize; + } + else { + BLOCK *tem; + size_t blockSize = pool->end - pool->start; + if (blockSize < INIT_BLOCK_SIZE) + blockSize = INIT_BLOCK_SIZE; + else + blockSize *= 2; + tem = malloc(offsetof(BLOCK, s) + blockSize); + if (!tem) + return 0; + tem->size = blockSize; + tem->next = pool->blocks; + pool->blocks = tem; + memcpy(tem->s, pool->start, pool->ptr - pool->start); + pool->ptr = tem->s + (pool->ptr - pool->start); + pool->start = tem->s; + pool->end = tem->s + blockSize; + } + return 1; +} diff --git a/expat/xmlparse/xmlparse.h b/expat/xmlparse/xmlparse.h new file mode 100755 index 00000000..21cac7d5 --- /dev/null +++ b/expat/xmlparse/xmlparse.h @@ -0,0 +1,113 @@ +#ifndef XmlParse_INCLUDED +#define XmlParse_INCLUDED 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef XMLPARSEAPI +#define XMLPARSEAPI /* as nothing */ +#endif + +typedef void *XML_Parser; + +/* Constructs a new parser; encoding should be the name of the charset from +the Content-Type header if the Content-Type is text/xml, or null otherwise. */ + +XML_Parser XMLPARSEAPI +XML_ParserCreate(const char *encoding); + +/* Information is UTF-8 encoded. */ + +/* atts is array of name/value pairs, terminated by NULL; + names and values are '\0' terminated. */ + +typedef void (*XML_StartElementHandler)(void *userData, + const char *name, + const char **atts); + +typedef void (*XML_EndElementHandler)(void *userData, + const char *name); + +typedef void (*XML_CharacterDataHandler)(void *userData, + const char *s, + size_t len); + +/* target and data are '\0' terminated */ +typedef void (*XML_ProcessingInstructionHandler)(void *userData, + const char *target, + const char *data); + +void XMLPARSEAPI +XML_SetElementHandler(XML_Parser parser, + XML_StartElementHandler start, + XML_EndElementHandler end); + +void XMLPARSEAPI +XML_SetCharacterDataHandler(XML_Parser parser, + XML_CharacterDataHandler handler); + +void XMLPARSEAPI +XML_SetProcessingInstructionHandler(XML_Parser parser, + XML_ProcessingInstructionHandler handler); + +/* This value is passed as the userData argument to callbacks. */ +void XMLPARSEAPI +XML_SetUserData(XML_Parser parser, void *userData); + +/* Parses some input. Returns 0 if a fatal error is detected. +The last call to XML_Parse must have isFinal true; +len may be zero for this call (or any other). */ +int XMLPARSEAPI +XML_Parse(XML_Parser parser, const char *s, size_t len, int isFinal); + +void XMLPARSEAPI * +XML_GetBuffer(XML_Parser parser, size_t len); + +int XMLPARSEAPI +XML_ParseBuffer(XML_Parser parser, size_t len, int isFinal); + +/* If XML_Parser or XML_ParseEnd have returned 0, then XML_GetError* +returns information about the error. */ + +enum XML_Error { + XML_ERROR_NONE, + XML_ERROR_NO_MEMORY, + XML_ERROR_SYNTAX, + XML_ERROR_NO_ELEMENTS, + XML_ERROR_INVALID_TOKEN, + XML_ERROR_UNCLOSED_TOKEN, + XML_ERROR_PARTIAL_CHAR, + XML_ERROR_TAG_MISMATCH, + XML_ERROR_DUPLICATE_ATTRIBUTE, + XML_ERROR_JUNK_AFTER_DOC_ELEMENT, + XML_ERROR_PARAM_ENTITY_REF, + XML_ERROR_UNDEFINED_ENTITY, + XML_ERROR_RECURSIVE_ENTITY_REF, + XML_ERROR_ASYNC_ENTITY, + XML_ERROR_BAD_CHAR_REF, + XML_ERROR_BINARY_ENTITY_REF, + XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + XML_ERROR_MISPLACED_XML_PI, + XML_ERROR_UNKNOWN_ENCODING, + XML_ERROR_INCORRECT_ENCODING +}; + +int XMLPARSEAPI XML_GetErrorCode(XML_Parser parser); +int XMLPARSEAPI XML_GetErrorLineNumber(XML_Parser parser); +int XMLPARSEAPI XML_GetErrorColumnNumber(XML_Parser parser); +long XMLPARSEAPI XML_GetErrorByteIndex(XML_Parser parser); + +void XMLPARSEAPI +XML_ParserFree(XML_Parser parser); + +const char XMLPARSEAPI * +XML_ErrorString(int code); + +#ifdef __cplusplus +} +#endif + +#endif /* not XmlParse_INCLUDED */