From 3a10246912412b2cc6b6fd52dfc768b69a3a8b49 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 1 Jun 1998 07:28:31 +0000 Subject: [PATCH] New approach for handling predefined entities --- expat/xmlparse/xmlparse.c | 63 ++++++++++++++++++-------------------- expat/xmltok/xmltok.c | 1 + expat/xmltok/xmltok.h | 4 +++ expat/xmltok/xmltok_impl.c | 53 ++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 33 deletions(-) diff --git a/expat/xmlparse/xmlparse.c b/expat/xmlparse/xmlparse.c index d1f9a3ae..604d63cc 100755 --- a/expat/xmlparse/xmlparse.c +++ b/expat/xmlparse/xmlparse.c @@ -78,7 +78,6 @@ typedef struct { const XML_Char *publicId; const XML_Char *notation; char open; - char magic; } ENTITY; typedef struct block { @@ -811,10 +810,19 @@ doContent(XML_Parser parser, return XML_ERROR_PARTIAL_CHAR; case XML_TOK_ENTITY_REF: { - const XML_Char *name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); + const XML_Char *name; ENTITY *entity; + XML_Char ch = XmlPredefinedEntityName(enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (characterDataHandler) + characterDataHandler(userData, &ch, 1); + break; + } + name = poolStoreString(&dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); @@ -826,11 +834,6 @@ doContent(XML_Parser parser, } break; } - if (entity->magic) { - if (characterDataHandler) - characterDataHandler(userData, entity->textPtr, entity->textLen); - break; - } if (entity->open) { errorPtr = s; return XML_ERROR_RECURSIVE_ENTITY_REF; @@ -1438,7 +1441,12 @@ prologProcessor(XML_Parser parser, break; case XML_ROLE_GENERAL_ENTITY_NAME: { - const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next); + const XML_Char *name; + if (XmlPredefinedEntityName(encoding, s, next)) { + declEntity = 0; + break; + } + name = poolStoreString(&dtd.pool, encoding, s, next); if (!name) return XML_ERROR_NO_MEMORY; if (dtd.complete) { @@ -1694,10 +1702,19 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, break; case XML_TOK_ENTITY_REF: { - const XML_Char *name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); + const XML_Char *name; ENTITY *entity; + XML_Char ch = XmlPredefinedEntityName(enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (!poolAppendChar(pool, ch)) + return XML_ERROR_NO_MEMORY; + break; + } + name = poolStoreString(&temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); @@ -1716,12 +1733,6 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, errorPtr = ptr; return XML_ERROR_BINARY_ENTITY_REF; } - else if (entity->magic) { - int i; - for (i = 0; i < entity->textLen; i++) - if (!poolAppendChar(pool, entity->textPtr[i])) - return XML_ERROR_NO_MEMORY; - } else if (!entity->textPtr) { errorPtr = ptr; return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; @@ -1986,20 +1997,8 @@ void normalizePublicId(XML_Char *publicId) static int dtdInit(DTD *p) { - static const XML_Char *names[] = { XML_T("lt"), XML_T("amp"), XML_T("gt"), XML_T("quot"), XML_T("apos") }; - static const XML_Char chars[] = { XML_T('<'), XML_T('&'), XML_T('>'), XML_T('"'), XML_T('\'') }; - int i; - poolInit(&(p->pool)); hashTableInit(&(p->generalEntities)); - for (i = 0; i < 5; i++) { - ENTITY *entity = (ENTITY *)lookup(&(p->generalEntities), names[i], sizeof(ENTITY)); - if (!entity) - return 0; - entity->textPtr = chars + i; - entity->textLen = 1; - entity->magic = 1; - } hashTableInit(&(p->elementTypes)); hashTableInit(&(p->attributeIds)); p->complete = 1; @@ -2101,8 +2100,6 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (!oldE) break; - if (oldE->magic) - continue; name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; diff --git a/expat/xmltok/xmltok.c b/expat/xmltok/xmltok.c index 3c4fde65..c6630be2 100755 --- a/expat/xmltok/xmltok.c +++ b/expat/xmltok/xmltok.c @@ -31,6 +31,7 @@ Contributor(s): PREFIX(skipS), \ PREFIX(getAtts), \ PREFIX(charRefNumber), \ + PREFIX(predefinedEntityName), \ PREFIX(updatePosition), \ PREFIX(isPublicId) diff --git a/expat/xmltok/xmltok.h b/expat/xmltok/xmltok.h index 30c393b8..fb4d1c12 100755 --- a/expat/xmltok/xmltok.h +++ b/expat/xmltok/xmltok.h @@ -142,6 +142,7 @@ struct encoding { int (*getAtts)(const ENCODING *enc, const char *ptr, int attsMax, ATTRIBUTE *atts); int (*charRefNumber)(const ENCODING *enc, const char *ptr); + int (*predefinedEntityName)(const ENCODING *, const char *, const char *); void (*updatePosition)(const ENCODING *, const char *ptr, const char *end, @@ -225,6 +226,9 @@ the content of a literal that has already been returned by XmlTok. */ #define XmlCharRefNumber(enc, ptr) \ (((enc)->charRefNumber)(enc, ptr)) +#define XmlPredefinedEntityName(enc, ptr, end) \ + (((enc)->predefinedEntityName)(enc, ptr, end)) + #define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c index 4f838325..030fbebf 100755 --- a/expat/xmltok/xmltok_impl.c +++ b/expat/xmltok/xmltok_impl.c @@ -1424,6 +1424,59 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) return checkCharRefNumber(result); } +static +int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) +{ + switch (end - ptr) { + case 2 * MINBPC: + if (CHAR_MATCHES(enc, ptr + MINBPC, 't')) { + switch (BYTE_TO_ASCII(enc, ptr)) { + case 'l': + return '<'; + case 'g': + return '>'; + } + } + break; + case 3 * MINBPC: + if (CHAR_MATCHES(enc, ptr, 'a')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'm')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'p')) + return '&'; + } + } + break; + case 4 * MINBPC: + switch (BYTE_TO_ASCII(enc, ptr)) { + case 'q': + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'u')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'o')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 't')) + return '"'; + } + } + break; + case 'a': + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'p')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 'o')) { + ptr += MINBPC; + if (CHAR_MATCHES(enc, ptr, 's')) + return '\''; + } + } + break; + } + } + return 0; +} + static int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { -- 2.40.0