-
-/* FIXME
-
-Normalize tokenized attribute values.
-
-Do attribute defaulting.
-
-*/
-
+/* FIXME Normalize tokenized attribute values. */
#include "xmlparse.h"
#include "xmltok.h"
char *start;
} STRING_POOL;
+/* The byte before the name is a scratch byte used to determine whether
+an attribute has been specified. */
+typedef struct {
+ char *name;
+} ATTRIBUTE_ID;
+
+typedef struct {
+ const ATTRIBUTE_ID *id;
+ const char *value;
+} DEFAULT_ATTRIBUTE;
+
+typedef struct {
+ const char *name;
+ int nDefaultAtts;
+ int allocDefaultAtts;
+ DEFAULT_ATTRIBUTE *defaultAtts;
+} ELEMENT_TYPE;
+
typedef struct {
HASH_TABLE generalEntities;
HASH_TABLE paramEntities;
+ HASH_TABLE elementTypes;
+ HASH_TABLE attributeIds;
STRING_POOL pool;
int containsRef;
int standalone;
const char *s, const char *end,
const char **nextPtr,
const ENCODING **enc);
-static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s);
+static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *tagName, const char *s);
+static int
+addDefaultAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, const char *value);
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *, const char *, const char *,
STRING_POOL *);
-static enum XML_Error
-checkAttributeUniqueness(XML_Parser parser, const ENCODING *enc, int nAtts);
+static ATTRIBUTE_ID *
+getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
static enum XML_Error
storeEntityValue(XML_Parser parser, const char *start, const char *end);
static int
static
const char *pushTag(XML_Parser parser, const ENCODING *enc, const char *rawName);
-
static int dtdInit(DTD *);
static void dtdDestroy(DTD *);
static void poolInit(STRING_POOL *);
const char *errorPtr;
int tagLevel;
ENTITY *declEntity;
+ ELEMENT_TYPE *declElementType;
+ ATTRIBUTE_ID *declAttributeId;
DTD dtd;
char *tagStack;
char *tagStackPtr;
#define bufferLim (((Parser *)parser)->bufferLim)
#define dtd (((Parser *)parser)->dtd)
#define declEntity (((Parser *)parser)->declEntity)
+#define declElementType (((Parser *)parser)->declElementType)
+#define declAttributeId (((Parser *)parser)->declAttributeId)
#define tagStackEnd (((Parser *)parser)->tagStackEnd)
#define tagStackPtr (((Parser *)parser)->tagStackPtr)
#define tagStack (((Parser *)parser)->tagStack)
bufferEnd = 0;
bufferEndByteIndex = 0;
bufferLim = 0;
+ declElementType = 0;
+ declAttributeId = 0;
+ declEntity = 0;
memset(&position, 0, sizeof(POSITION));
errorCode = XML_ERROR_NONE;
errorByteIndex = 0;
}
else {
char *newBuf;
- size_t bufferSize = bufferLim - bufferPtr;
+ size_t bufferSize = bufferLim - bufferPtr;
+ if (bufferSize == 0)
+ bufferSize = 1024;
do {
bufferSize *= 2;
} while (bufferSize < neededSize);
static const char *nullPtr = 0;
const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
for (;;) {
- const char **appAtts;
const char *next;
int tok = XmlContentTok(enc, s, end, &next);
switch (tok) {
case XML_TOK_START_TAG_WITH_ATTS:
{
const char *name;
- enum XML_Error result = storeAtts(parser, enc, s);
- if (result)
- return result;
name = pushTag(parser, enc, s + enc->minBytesPerChar);
if (!name)
return XML_ERROR_NO_MEMORY;
++tagLevel;
if (startElementHandler) {
+ enum XML_Error result = storeAtts(parser, enc, name, s);
+ if (result)
+ return result;
startElementHandler(userData, name, (const char **)atts);
poolClear(&tempPool);
}
+ else {
+ enum XML_Error result = storeAtts(parser, enc, 0, s);
+ if (result)
+ return result;
+ }
break;
}
case XML_TOK_START_TAG_NO_ATTS:
++tagLevel;
if (!name)
return XML_ERROR_NO_MEMORY;
- if (startElementHandler)
- startElementHandler(userData, name, &nullPtr);
+ if (startElementHandler) {
+ enum XML_Error result = storeAtts(parser, enc, name, s);
+ if (result)
+ return result;
+ startElementHandler(userData, name, (const char **)atts);
+ }
break;
}
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
- {
- enum XML_Error result = storeAtts(parser, enc, s);
+ if (!startElementHandler) {
+ enum XML_Error result = storeAtts(parser, enc, 0, s);
if (result)
return result;
- appAtts = (const char **)atts;
- goto emptyElement;
}
+ /* fall through */
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
- appAtts = &nullPtr;
- emptyElement:
if (startElementHandler || endElementHandler) {
- const char *nameStart = s + enc->minBytesPerChar;
- const char *name = poolStoreString(&tempPool, enc, nameStart,
- nameStart
- + XmlNameLength(enc, nameStart));
+ const char *rawName = s + enc->minBytesPerChar;
+ const char *name = poolStoreString(&tempPool, enc, rawName,
+ rawName
+ + XmlNameLength(enc, rawName));
if (!name)
return XML_ERROR_NO_MEMORY;
- if (startElementHandler)
- startElementHandler(userData, name, appAtts);
+ poolFinish(&tempPool);
+ if (startElementHandler) {
+ enum XML_Error result = storeAtts(parser, enc, name, s);
+ if (result)
+ return result;
+ startElementHandler(userData, name, (const char **)atts);
+ }
if (endElementHandler)
endElementHandler(userData, name);
poolClear(&tempPool);
char *newStack;
size_t newSize;
XmlConvert(enc, XML_UTF8_ENCODING,
- &rawName, rawNameEnd,
- &tagStackPtr, tagStackEnd - 1);
- if (rawName == rawNameEnd)
- break;
- newSize = (tagStackEnd - tagStack) >> 1;
- newStack = realloc(tagStack, newSize);
+ &rawName, rawNameEnd,
+ &tagStackPtr, tagStackEnd - 1);
+ if (rawName == rawNameEnd)
+ break;
+ newSize = (tagStackEnd - tagStack) >> 1;
+ newStack = realloc(tagStack, newSize);
if (!newStack)
return 0;
tagStackEnd = newStack + newSize;
tagStackPtr = newStack + (tagStackPtr - tagStack);
name = newStack + (name - tagStack);
tagStack = newStack;
- }
+ }
*tagStackPtr++ = '\0';
return name;
}
-static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *s)
+/* If tagName is non-null, build a real list of attributes,
+otherwise just check the attributes for well-formedness. */
+
+static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
+ const char *tagName, const char *s)
{
- const char **appAtts = 0;
+ ELEMENT_TYPE *elementType = 0;
+ int nDefaultAtts = 0;
+ const char **appAtts = (const char **)atts;
int i;
- int n = XmlGetAttributes(enc, s, attsSize, atts);
- if (n > attsSize) {
+ int n;
+
+ if (tagName) {
+ elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagName, 0);
+ if (elementType)
+ nDefaultAtts = elementType->nDefaultAtts;
+ }
+
+ n = XmlGetAttributes(enc, s, attsSize, atts);
+ if (n + nDefaultAtts > attsSize) {
attsSize = 2*n;
atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
if (!atts)
return XML_ERROR_NO_MEMORY;
- XmlGetAttributes(enc, s, n, atts);
- }
- if (n > 1) {
- enum XML_Error result = checkAttributeUniqueness(parser, enc, n);
- if (result)
- return result;
+ if (n > attsSize)
+ XmlGetAttributes(enc, s, n, atts);
}
- if (startElementHandler)
- appAtts = (const char **)atts;
for (i = 0; i < n; i++) {
- if (appAtts) {
- appAtts[i << 1] = poolStoreString(&tempPool, enc,
- atts[i].name,
- atts[i].name
- + XmlNameLength(enc, atts[i].name));
- if (appAtts[i << 1] == 0)
- return XML_ERROR_NO_MEMORY;
- poolFinish(&tempPool);
+ ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
+ atts[i].name
+ + XmlNameLength(enc, atts[i].name));
+ if (!attId)
+ return XML_ERROR_NO_MEMORY;
+ if (attId->name[-1]) {
+ errorPtr = atts[i].name;
+ return XML_ERROR_DUPLICATE_ATTRIBUTE;
}
+ attId->name[-1] = 1;
+ appAtts[i << 1] = attId->name;
if (!atts[i].normalized) {
enum XML_Error result
= storeAttributeValue(parser, enc,
return result;
if (!poolAppendByte(&tempPool, '\0'))
return XML_ERROR_NO_MEMORY;
- if (appAtts) {
+ if (tagName) {
appAtts[(i << 1) + 1] = poolStart(&tempPool);
poolFinish(&tempPool);
}
else
poolDiscard(&tempPool);
}
- else if (appAtts) {
+ else if (tagName) {
appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
if (appAtts[(i << 1) + 1] == 0)
return XML_ERROR_NO_MEMORY;
poolFinish(&tempPool);
}
}
- if (appAtts)
- appAtts[n << 1] = 0;
+ if (tagName) {
+ int j;
+ for (j = 0; j < nDefaultAtts; j++) {
+ const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
+ if (!da->id->name[-1]) {
+ da->id->name[-1] = 1;
+ appAtts[i << 1] = da->id->name;
+ appAtts[(i << 1) + 1] = da->value;
+ i++;
+ }
+ }
+ appAtts[i << 1] = 0;
+ }
+ for (i = 0; i < n; i++)
+ ((char *)appAtts[i << 1])[-1] = 0;
return XML_ERROR_NONE;
}
case XML_ROLE_INSTANCE_START:
processor = contentProcessor;
return contentProcessor(parser, s, end, nextPtr);
+ case XML_ROLE_ATTLIST_ELEMENT_NAME:
+ {
+ const char *name = poolStoreString(&dtd.pool, encoding, s, next);
+ if (!name)
+ return XML_ERROR_NO_MEMORY;
+ declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
+ if (!declElementType)
+ return XML_ERROR_NO_MEMORY;
+ if (declElementType->name != name)
+ poolDiscard(&dtd.pool);
+ else
+ poolFinish(&dtd.pool);
+ break;
+ }
+ case XML_ROLE_ATTRIBUTE_NAME:
+ declAttributeId = getAttributeId(parser, encoding, s, next);
+ if (!declAttributeId)
+ return XML_ERROR_NO_MEMORY;
+ break;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
{
&dtd.pool);
if (result)
return result;
+ if (!poolAppendByte(&dtd.pool, 0))
+ return XML_ERROR_NO_MEMORY;
+ if (!addDefaultAttribute(declElementType, declAttributeId, poolStart(&dtd.pool)))
+ return XML_ERROR_NO_MEMORY;
poolFinish(&dtd.pool);
break;
}
if (!name)
return XML_ERROR_NO_MEMORY;
declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
+ if (!declEntity)
+ return XML_ERROR_NO_MEMORY;
if (declEntity->name != name) {
poolDiscard(&dtd.pool);
declEntity = 0;
if (!name)
return XML_ERROR_NO_MEMORY;
declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
+ if (!declEntity)
+ return XML_ERROR_NO_MEMORY;
if (declEntity->name != name) {
poolDiscard(&dtd.pool);
declEntity = 0;
}
}
-static enum XML_Error
-checkAttributeUniqueness(XML_Parser parser, const ENCODING *enc, int nAtts)
-{
- int i;
- for (i = 1; i < nAtts; i++) {
- int j;
- for (j = 0; j < i; j++) {
- if (XmlSameName(enc, atts[i].name, atts[j].name)) {
- errorPtr = atts[i].name;
- return XML_ERROR_DUPLICATE_ATTRIBUTE;
- }
- }
- }
- return XML_ERROR_NONE;
-}
-
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end,
reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
{
const char *target;
- int targetLen;
const char *data;
if (!processingInstructionHandler)
return 1;
target = start + enc->minBytesPerChar * 2;
- targetLen = XmlNameLength(enc, target);
- target = poolStoreString(&tempPool, enc, target, target + targetLen);
+ data = target + XmlNameLength(enc, target);
+ target = poolStoreString(&tempPool, enc, target, data);
if (!target)
return 0;
poolFinish(&tempPool);
- data = XmlSkipS(enc, start + enc->minBytesPerChar*2 + targetLen);
data = poolStoreString(&tempPool, enc,
- data,
+ XmlSkipS(enc, data),
end - enc->minBytesPerChar*2);
if (!data)
return 0;
return 1;
}
+static int
+addDefaultAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, const char *value)
+{
+ DEFAULT_ATTRIBUTE *att;
+ if (type->nDefaultAtts == type->allocDefaultAtts) {
+ if (type->allocDefaultAtts == 0)
+ type->allocDefaultAtts = 8;
+ else
+ type->allocDefaultAtts *= 2;
+ type->defaultAtts = realloc(type->defaultAtts, type->allocDefaultAtts);
+ if (!type->defaultAtts)
+ return 0;
+ }
+ att = type->defaultAtts + type->nDefaultAtts;
+ att->id = attId;
+ att->value = value;
+ type->nDefaultAtts += 1;
+ return 1;
+}
+
+static ATTRIBUTE_ID *
+getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
+{
+ ATTRIBUTE_ID *id;
+ const char *name;
+ if (!poolAppendByte(&dtd.pool, 0))
+ return 0;
+ name = poolStoreString(&dtd.pool, enc, start, end);
+ if (!name)
+ return 0;
+ ++name;
+ id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
+ if (!id)
+ return 0;
+ if (id->name != name)
+ poolDiscard(&dtd.pool);
+ else
+ poolFinish(&dtd.pool);
+ return id;
+}
+
static int dtdInit(DTD *p)
{
static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
entity->magic = 1;
}
hashTableInit(&(p->paramEntities));
+ hashTableInit(&(p->elementTypes));
+ hashTableInit(&(p->attributeIds));
p->containsRef = 0;
p->groupSize = 0;
p->groupConnector = 0;
poolDestroy(&(p->pool));
hashTableDestroy(&(p->generalEntities));
hashTableDestroy(&(p->paramEntities));
+ hashTableDestroy(&(p->elementTypes));
+ hashTableDestroy(&(p->attributeIds));
free(p->groupConnector);
}