1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
56 ** DAV extension module for Apache 2.0.*
57 ** - XML parser for the body of a request
60 /* James Clark's Expat parser */
64 #include "http_protocol.h"
66 #include "http_core.h"
71 #define DEBUG_CR "\r\n"
73 #define AP_XML_READ_BLOCKSIZE 2048 /* used for reading input blocks */
75 /* errors related to namespace processing */
76 #define AP_XML_NS_ERROR_UNKNOWN_PREFIX (AP_XML_NS_ERROR_BASE)
78 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
79 #define AP_XML_NS_IS_RESERVED(name) \
80 ( (name[0] == 'X' || name[0] == 'x') && \
81 (name[1] == 'M' || name[1] == 'm') && \
82 (name[2] == 'L' || name[2] == 'l') )
85 /* content for parsing */
86 typedef struct ap_xml_ctx {
87 ap_xml_doc *doc; /* the doc we're parsing */
88 ap_pool_t *p; /* the pool we allocate from */
89 ap_xml_elem *cur_elem; /* current element */
91 int error; /* an error has occurred */
92 /* errors may be AP_XML_NS_ERROR_* or other private errors which will
93 be defined here (none yet) */
97 /* struct for scoping namespace declarations */
98 typedef struct ap_xml_ns_scope {
99 const char *prefix; /* prefix used for this ns */
100 int ns; /* index into namespace table */
101 int emptyURI; /* the namespace URI is the empty string */
102 struct ap_xml_ns_scope *next; /* next scoped namespace */
106 /* return namespace table index for a given prefix */
107 static int find_prefix(ap_xml_ctx *ctx, const char *prefix)
109 ap_xml_elem *elem = ctx->cur_elem;
112 ** Walk up the tree, looking for a namespace scope that defines this
115 for (; elem; elem = elem->parent) {
116 ap_xml_ns_scope *ns_scope = elem->ns_scope;
118 for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
119 if (strcmp(prefix, ns_scope->prefix) == 0) {
120 if (ns_scope->emptyURI) {
122 ** It is possible to set the default namespace to an
123 ** empty URI string; this resets the default namespace
124 ** to mean "no namespace." We just found the prefix
125 ** refers to an empty URI, so return "no namespace."
127 return AP_XML_NS_NONE;
136 * If the prefix is empty (""), this means that a prefix was not
137 * specified in the element/attribute. The search that was performed
138 * just above did not locate a default namespace URI (which is stored
139 * into ns_scope with an empty prefix). This means the element/attribute
140 * has "no namespace". We have a reserved value for this.
142 if (*prefix == '\0') {
143 return AP_XML_NS_NONE;
147 return AP_XML_NS_ERROR_UNKNOWN_PREFIX;
150 static void start_handler(void *userdata, const char *name, const char **attrs)
152 ap_xml_ctx *ctx = userdata;
160 /* punt once we find an error */
164 elem = ap_pcalloc(ctx->p, sizeof(*elem));
166 /* prep the element */
167 elem->name = elem_name = ap_pstrdup(ctx->p, name);
169 /* fill in the attributes (note: ends up in reverse order) */
171 attr = ap_palloc(ctx->p, sizeof(*attr));
172 attr->name = ap_pstrdup(ctx->p, *attrs++);
173 attr->value = ap_pstrdup(ctx->p, *attrs++);
174 attr->next = elem->attr;
178 /* hook the element into the tree */
179 if (ctx->cur_elem == NULL) {
180 /* no current element; this also becomes the root */
181 ctx->cur_elem = ctx->doc->root = elem;
184 /* this element appeared within the current elem */
185 elem->parent = ctx->cur_elem;
187 /* set up the child/sibling links */
188 if (elem->parent->last_child == NULL) {
189 /* no first child either */
190 elem->parent->first_child = elem->parent->last_child = elem;
193 /* hook onto the end of the parent's children */
194 elem->parent->last_child->next = elem;
195 elem->parent->last_child = elem;
198 /* this element is now the current element */
199 ctx->cur_elem = elem;
202 /* scan the attributes for namespace declarations */
203 for (prev = NULL, attr = elem->attr;
206 if (strncmp(attr->name, "xmlns", 5) == 0) {
207 const char *prefix = &attr->name[5];
208 ap_xml_ns_scope *ns_scope;
210 /* test for xmlns:foo= form and xmlns= form */
213 else if (*prefix != '\0') {
214 /* advance "prev" since "attr" is still present */
219 /* quote the URI before we ever start working with it */
220 quoted = ap_xml_quote_string(ctx->p, attr->value, 1);
222 /* build and insert the new scope */
223 ns_scope = ap_pcalloc(ctx->p, sizeof(*ns_scope));
224 ns_scope->prefix = prefix;
225 ns_scope->ns = ap_xml_insert_uri(ctx->doc->namespaces, quoted);
226 ns_scope->emptyURI = *quoted == '\0';
227 ns_scope->next = elem->ns_scope;
228 elem->ns_scope = ns_scope;
230 /* remove this attribute from the element */
232 elem->attr = attr->next;
234 prev->next = attr->next;
236 /* Note: prev will not be advanced since we just removed "attr" */
238 else if (strcmp(attr->name, "xml:lang") == 0) {
239 /* save away the language (in quoted form) */
240 elem->lang = ap_xml_quote_string(ctx->p, attr->value, 1);
242 /* remove this attribute from the element */
244 elem->attr = attr->next;
246 prev->next = attr->next;
248 /* Note: prev will not be advanced since we just removed "attr" */
251 /* advance "prev" since "attr" is still present */
257 ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
258 ** language from the parent element (if present).
260 ** NOTE: elem_size() *depends* upon this pointer equality.
262 if (elem->lang == NULL && elem->parent != NULL)
263 elem->lang = elem->parent->lang;
265 /* adjust the element's namespace */
266 colon = ap_strchr(elem_name, ':');
269 * The element is using the default namespace, which will always
270 * be found. Either it will be "no namespace", or a default
271 * namespace URI has been specified at some point.
273 elem->ns = find_prefix(ctx, "");
275 else if (AP_XML_NS_IS_RESERVED(elem->name)) {
276 elem->ns = AP_XML_NS_NONE;
280 elem->ns = find_prefix(ctx, elem->name);
281 elem->name = colon + 1;
283 if (AP_XML_NS_IS_ERROR(elem->ns)) {
284 ctx->error = elem->ns;
289 /* adjust all remaining attributes' namespaces */
290 for (attr = elem->attr; attr; attr = attr->next) {
292 * ap_xml_attr defines this as "const" but we dup'd it, so we
293 * know that we can change it. a bit hacky, but the existing
294 * structure def is best.
296 char *attr_name = (char *)attr->name;
298 colon = ap_strchr(attr_name, ':');
301 * Attributes do NOT use the default namespace. Therefore,
302 * we place them into the "no namespace" category.
304 attr->ns = AP_XML_NS_NONE;
306 else if (AP_XML_NS_IS_RESERVED(attr->name)) {
307 attr->ns = AP_XML_NS_NONE;
311 attr->ns = find_prefix(ctx, attr->name);
312 attr->name = colon + 1;
314 if (AP_XML_NS_IS_ERROR(attr->ns)) {
315 ctx->error = attr->ns;
322 static void end_handler(void *userdata, const char *name)
324 ap_xml_ctx *ctx = userdata;
326 /* punt once we find an error */
330 /* pop up one level */
331 ctx->cur_elem = ctx->cur_elem->parent;
334 static void cdata_handler(void *userdata, const char *data, int len)
336 ap_xml_ctx *ctx = userdata;
341 /* punt once we find an error */
345 elem = ctx->cur_elem;
346 s = ap_pstrndup(ctx->p, data, len);
348 if (elem->last_child == NULL) {
349 /* no children yet. this cdata follows the start tag */
350 hdr = &elem->first_cdata;
353 /* child elements exist. this cdata follows the last child. */
354 hdr = &elem->last_child->following_cdata;
357 ap_text_append(ctx->p, hdr, s);
360 API_EXPORT(int) ap_xml_parse_input(request_rec * r, ap_xml_doc **pdoc)
367 if ((result = ap_setup_client_block(r, REQUEST_CHUNKED_DECHUNK)) != OK)
370 if (r->remaining == 0) {
376 ctx.doc = ap_pcalloc(ctx.p, sizeof(*ctx.doc));
378 ctx.doc->namespaces = ap_make_array(ctx.p, 5, sizeof(const char *));
379 ap_xml_insert_uri(ctx.doc->namespaces, "DAV:");
381 /* ### we should get the encoding from Content-Encoding */
382 parser = XML_ParserCreate(NULL);
383 if (parser == NULL) {
384 /* ### anything better to do? */
385 fprintf(stderr, "Ouch! XML_ParserCreate() failed!\n");
389 XML_SetUserData(parser, (void *) &ctx);
390 XML_SetElementHandler(parser, start_handler, end_handler);
391 XML_SetCharacterDataHandler(parser, cdata_handler);
393 if (ap_should_client_block(r)) {
398 size_t total_read = 0;
399 size_t limit_xml_body = ap_get_limit_xml_body(r);
401 /* allocate our working buffer */
402 buffer = ap_palloc(r->pool, AP_XML_READ_BLOCKSIZE);
404 /* read the body, stuffing it into the parser */
405 while ((len = ap_get_client_block(r, buffer, AP_XML_READ_BLOCKSIZE)) > 0) {
407 if (limit_xml_body && total_read > limit_xml_body) {
408 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
409 "XML request body is larger than the configured "
410 "limit of %lu", (unsigned long)limit_xml_body);
414 rv = XML_Parse(parser, buffer, len, 0);
419 /* ap_get_client_block() has logged an error */
423 /* tell the parser that we're done */
424 rv = XML_Parse(parser, &end, 0, 1);
429 XML_ParserFree(parser);
433 case AP_XML_NS_ERROR_UNKNOWN_PREFIX:
434 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
435 "An undefined namespace prefix was used.");
439 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
440 "There was an error within the XML request body.");
444 /* Apache will supply a default error, plus the error log above. */
445 return HTTP_BAD_REQUEST;
448 /* ### assert: ctx.cur_elem == NULL */
456 enum XML_Error err = XML_GetErrorCode(parser);
458 /* ### fix this error message (default vs special) */
459 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
460 "XML parser error code: %s (%d).",
461 XML_ErrorString(err), err);
463 XML_ParserFree(parser);
465 /* Apache will supply a default error, plus the error log above. */
466 return HTTP_BAD_REQUEST;
470 XML_ParserFree(parser);
472 /* Apache will supply a default error, plus whatever was logged. */
473 return HTTP_BAD_REQUEST;
476 API_EXPORT(void) ap_text_append(ap_pool_t * p, ap_text_header *hdr,
479 ap_text *t = ap_palloc(p, sizeof(*t));
484 if (hdr->first == NULL) {
485 /* no text elements yet */
486 hdr->first = hdr->last = t;
489 /* append to the last text element */
496 /* ---------------------------------------------------------------
498 ** XML UTILITY FUNCTIONS
502 ** ap_xml_quote_string: quote an XML string
504 ** Replace '<', '>', and '&' with '<', '>', and '&'.
505 ** If quotes is true, then replace '"' with '"'.
507 ** quotes is typically set to true for XML strings that will occur within
508 ** double quotes -- attribute values.
510 API_EXPORT(const char *) ap_xml_quote_string(ap_pool_t *p, const char *s,
520 for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
521 if (c == '<' || c == '>')
522 extra += 3; /* < or > */
524 extra += 4; /* & */
525 else if (quotes && c == '"')
526 extra += 5; /* " */
533 qstr = ap_palloc(p, len + extra + 1);
534 for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
554 else if (quotes && c == '"') {
571 /* how many characters for the given integer? */
572 #define AP_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
573 (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
574 (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
575 (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
577 static int text_size(const ap_text *t)
581 for (; t; t = t->next)
582 size += strlen(t->text);
586 static size_t elem_size(const ap_xml_elem *elem, int style,
587 ap_array_header_t *namespaces, int *ns_map)
591 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
592 const ap_xml_attr *attr;
596 if (style == AP_XML_X2T_FULL_NS_LANG) {
600 ** The outer element will contain xmlns:ns%d="%s" attributes
601 ** and an xml:lang attribute, if applicable.
604 for (i = namespaces->nelts; i--;) {
605 /* compute size of: ' xmlns:ns%d="%s"' */
606 size += (9 + AP_XML_NS_LEN(i) + 2 +
607 strlen(AP_XML_GET_URI_ITEM(namespaces, i)) + 1);
610 if (elem->lang != NULL) {
611 /* compute size of: ' xml:lang="%s"' */
612 size += 11 + strlen(elem->lang) + 1;
616 if (elem->ns == AP_XML_NS_NONE) {
617 /* compute size of: <%s> */
618 size += 1 + strlen(elem->name) + 1;
621 int ns = ns_map ? ns_map[elem->ns] : elem->ns;
623 /* compute size of: <ns%d:%s> */
624 size += 3 + AP_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
627 if (AP_XML_ELEM_IS_EMPTY(elem)) {
628 /* insert a closing "/" */
633 * two of above plus "/":
634 * <ns%d:%s> ... </ns%d:%s>
640 for (attr = elem->attr; attr; attr = attr->next) {
641 if (attr->ns == AP_XML_NS_NONE) {
642 /* compute size of: ' %s="%s"' */
643 size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
646 /* compute size of: ' ns%d:%s="%s"' */
647 size += 3 + AP_XML_NS_LEN(attr->ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
652 ** If the element has an xml:lang value that is *different* from
653 ** its parent, then add the thing in: ' xml:lang="%s"'.
655 ** NOTE: we take advantage of the pointer equality established by
656 ** the parsing for "inheriting" the xml:lang values from parents.
658 if (elem->lang != NULL &&
659 (elem->parent == NULL || elem->lang != elem->parent->lang)) {
660 size += 11 + strlen(elem->lang) + 1;
663 else if (style == AP_XML_X2T_LANG_INNER) {
665 * This style prepends the xml:lang value plus a null terminator.
666 * If a lang value is not present, then we insert a null term.
668 size = elem->lang ? strlen(elem->lang) + 1 : 1;
673 size += text_size(elem->first_cdata.first);
675 for (elem = elem->first_child; elem; elem = elem->next) {
676 /* the size of the child element plus the CDATA that follows it */
677 size += (elem_size(elem, AP_XML_X2T_FULL, NULL, ns_map) +
678 text_size(elem->following_cdata.first));
684 static char *write_text(char *s, const ap_text *t)
686 for (; t; t = t->next) {
687 size_t len = strlen(t->text);
688 memcpy(s, t->text, len);
694 static char *write_elem(char *s, const ap_xml_elem *elem, int style,
695 ap_array_header_t *namespaces, int *ns_map)
697 const ap_xml_elem *child;
701 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
702 int empty = AP_XML_ELEM_IS_EMPTY(elem);
703 const ap_xml_attr *attr;
705 if (elem->ns == AP_XML_NS_NONE) {
706 len = sprintf(s, "<%s", elem->name);
709 ns = ns_map ? ns_map[elem->ns] : elem->ns;
710 len = sprintf(s, "<ns%d:%s", ns, elem->name);
714 for (attr = elem->attr; attr; attr = attr->next) {
715 if (attr->ns == AP_XML_NS_NONE)
716 len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
718 len = sprintf(s, " ns%d:%s=\"%s\"", attr->ns, attr->name, attr->value);
722 /* add the xml:lang value if necessary */
723 if (elem->lang != NULL &&
724 (style == AP_XML_X2T_FULL_NS_LANG ||
725 elem->parent == NULL ||
726 elem->lang != elem->parent->lang)) {
727 len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
731 /* add namespace definitions, if required */
732 if (style == AP_XML_X2T_FULL_NS_LANG) {
735 for (i = namespaces->nelts; i--;) {
736 len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
737 AP_XML_GET_URI_ITEM(namespaces, i));
742 /* no more to do. close it up and go. */
752 else if (style == AP_XML_X2T_LANG_INNER) {
753 /* prepend the xml:lang value */
754 if (elem->lang != NULL) {
755 len = strlen(elem->lang);
756 memcpy(s, elem->lang, len);
762 s = write_text(s, elem->first_cdata.first);
764 for (child = elem->first_child; child; child = child->next) {
765 s = write_elem(s, child, AP_XML_X2T_FULL, NULL, ns_map);
766 s = write_text(s, child->following_cdata.first);
769 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
770 if (elem->ns == AP_XML_NS_NONE) {
771 len = sprintf(s, "</%s>", elem->name);
774 ns = ns_map ? ns_map[elem->ns] : elem->ns;
775 len = sprintf(s, "</ns%d:%s>", ns, elem->name);
783 API_EXPORT(void) ap_xml_quote_elem(ap_pool_t *p, ap_xml_elem *elem)
786 ap_xml_attr *scan_attr;
787 ap_xml_elem *scan_elem;
789 /* convert the element's text */
790 for (scan_txt = elem->first_cdata.first;
792 scan_txt = scan_txt->next) {
793 scan_txt->text = ap_xml_quote_string(p, scan_txt->text, 0);
795 for (scan_txt = elem->following_cdata.first;
797 scan_txt = scan_txt->next) {
798 scan_txt->text = ap_xml_quote_string(p, scan_txt->text, 0);
801 /* convert the attribute values */
802 for (scan_attr = elem->attr;
804 scan_attr = scan_attr->next) {
805 scan_attr->value = ap_xml_quote_string(p, scan_attr->value, 1);
808 /* convert the child elements */
809 for (scan_elem = elem->first_child;
811 scan_elem = scan_elem->next) {
812 ap_xml_quote_elem(p, scan_elem);
816 /* convert an element to a text string */
817 API_EXPORT(void) ap_xml_to_text(ap_pool_t * p, const ap_xml_elem *elem,
818 int style, ap_array_header_t *namespaces,
819 int *ns_map, const char **pbuf, size_t *psize)
821 /* get the exact size, plus a null terminator */
822 size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
823 char *s = ap_palloc(p, size);
825 (void) write_elem(s, elem, style, namespaces, ns_map);
833 API_EXPORT(const char *) ap_xml_empty_elem(ap_pool_t * p,
834 const ap_xml_elem *elem)
836 if (elem->ns == AP_XML_NS_NONE) {
838 * The prefix (xml...) is already within the prop name, or
839 * the element simply has no prefix.
841 return ap_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
844 return ap_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
847 /* return the URI's (existing) index, or insert it and return a new index */
848 API_EXPORT(int) ap_xml_insert_uri(ap_array_header_t *uri_array,
854 for (i = uri_array->nelts; i--;) {
855 if (strcmp(uri, AP_XML_GET_URI_ITEM(uri_array, i)) == 0)
859 pelt = ap_push_array(uri_array);
860 *pelt = uri; /* assume uri is const or in a pool */
861 return uri_array->nelts - 1;