1 /* ====================================================================
2 * The Apache Software License, Version 1.1
4 * Copyright (c) 2000 The Apache Software Foundation. All rights
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * 3. The end-user documentation included with the redistribution,
20 * if any, must include the following acknowledgment:
21 * "This product includes software developed by the
22 * Apache Software Foundation (http://www.apache.org/)."
23 * Alternately, this acknowledgment may appear in the software itself,
24 * if and wherever such third-party acknowledgments normally appear.
26 * 4. The names "Apache" and "Apache Software Foundation" must
27 * not be used to endorse or promote products derived from this
28 * software without prior written permission. For written
29 * permission, please contact apache@apache.org.
31 * 5. Products derived from this software may not be called "Apache",
32 * nor may "Apache" appear in their name, without prior written
33 * permission of the Apache Software Foundation.
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * ====================================================================
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation. For more
51 * information on the Apache Software Foundation, please see
52 * <http://www.apache.org/>.
56 ** DAV extension module for Apache 2.0.*
57 ** - XML parser for the body of a request
60 /* James Clark's Expat parser */
64 #include "http_protocol.h"
66 #include "http_core.h"
67 #include "apr_strings.h"
72 #define DEBUG_CR "\r\n"
74 #define AP_XML_READ_BLOCKSIZE 2048 /* used for reading input blocks */
76 /* errors related to namespace processing */
77 #define AP_XML_NS_ERROR_UNKNOWN_PREFIX (AP_XML_NS_ERROR_BASE)
79 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
80 #define AP_XML_NS_IS_RESERVED(name) \
81 ( (name[0] == 'X' || name[0] == 'x') && \
82 (name[1] == 'M' || name[1] == 'm') && \
83 (name[2] == 'L' || name[2] == 'l') )
86 /* content for parsing */
87 typedef struct ap_xml_ctx {
88 ap_xml_doc *doc; /* the doc we're parsing */
89 apr_pool_t *p; /* the pool we allocate from */
90 ap_xml_elem *cur_elem; /* current element */
92 int error; /* an error has occurred */
93 /* errors may be AP_XML_NS_ERROR_* or other private errors which will
94 be defined here (none yet) */
98 /* struct for scoping namespace declarations */
99 typedef struct ap_xml_ns_scope {
100 const char *prefix; /* prefix used for this ns */
101 int ns; /* index into namespace table */
102 int emptyURI; /* the namespace URI is the empty string */
103 struct ap_xml_ns_scope *next; /* next scoped namespace */
107 /* return namespace table index for a given prefix */
108 static int find_prefix(ap_xml_ctx *ctx, const char *prefix)
110 ap_xml_elem *elem = ctx->cur_elem;
113 ** Walk up the tree, looking for a namespace scope that defines this
116 for (; elem; elem = elem->parent) {
117 ap_xml_ns_scope *ns_scope = elem->ns_scope;
119 for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
120 if (strcmp(prefix, ns_scope->prefix) == 0) {
121 if (ns_scope->emptyURI) {
123 ** It is possible to set the default namespace to an
124 ** empty URI string; this resets the default namespace
125 ** to mean "no namespace." We just found the prefix
126 ** refers to an empty URI, so return "no namespace."
128 return AP_XML_NS_NONE;
137 * If the prefix is empty (""), this means that a prefix was not
138 * specified in the element/attribute. The search that was performed
139 * just above did not locate a default namespace URI (which is stored
140 * into ns_scope with an empty prefix). This means the element/attribute
141 * has "no namespace". We have a reserved value for this.
143 if (*prefix == '\0') {
144 return AP_XML_NS_NONE;
148 return AP_XML_NS_ERROR_UNKNOWN_PREFIX;
151 static void start_handler(void *userdata, const char *name, const char **attrs)
153 ap_xml_ctx *ctx = userdata;
161 /* punt once we find an error */
165 elem = apr_pcalloc(ctx->p, sizeof(*elem));
167 /* prep the element */
168 elem->name = elem_name = apr_pstrdup(ctx->p, name);
170 /* fill in the attributes (note: ends up in reverse order) */
172 attr = apr_palloc(ctx->p, sizeof(*attr));
173 attr->name = apr_pstrdup(ctx->p, *attrs++);
174 attr->value = apr_pstrdup(ctx->p, *attrs++);
175 attr->next = elem->attr;
179 /* hook the element into the tree */
180 if (ctx->cur_elem == NULL) {
181 /* no current element; this also becomes the root */
182 ctx->cur_elem = ctx->doc->root = elem;
185 /* this element appeared within the current elem */
186 elem->parent = ctx->cur_elem;
188 /* set up the child/sibling links */
189 if (elem->parent->last_child == NULL) {
190 /* no first child either */
191 elem->parent->first_child = elem->parent->last_child = elem;
194 /* hook onto the end of the parent's children */
195 elem->parent->last_child->next = elem;
196 elem->parent->last_child = elem;
199 /* this element is now the current element */
200 ctx->cur_elem = elem;
203 /* scan the attributes for namespace declarations */
204 for (prev = NULL, attr = elem->attr;
207 if (strncmp(attr->name, "xmlns", 5) == 0) {
208 const char *prefix = &attr->name[5];
209 ap_xml_ns_scope *ns_scope;
211 /* test for xmlns:foo= form and xmlns= form */
214 else if (*prefix != '\0') {
215 /* advance "prev" since "attr" is still present */
220 /* quote the URI before we ever start working with it */
221 quoted = ap_xml_quote_string(ctx->p, attr->value, 1);
223 /* build and insert the new scope */
224 ns_scope = apr_pcalloc(ctx->p, sizeof(*ns_scope));
225 ns_scope->prefix = prefix;
226 ns_scope->ns = ap_xml_insert_uri(ctx->doc->namespaces, quoted);
227 ns_scope->emptyURI = *quoted == '\0';
228 ns_scope->next = elem->ns_scope;
229 elem->ns_scope = ns_scope;
231 /* remove this attribute from the element */
233 elem->attr = attr->next;
235 prev->next = attr->next;
237 /* Note: prev will not be advanced since we just removed "attr" */
239 else if (strcmp(attr->name, "xml:lang") == 0) {
240 /* save away the language (in quoted form) */
241 elem->lang = ap_xml_quote_string(ctx->p, attr->value, 1);
243 /* remove this attribute from the element */
245 elem->attr = attr->next;
247 prev->next = attr->next;
249 /* Note: prev will not be advanced since we just removed "attr" */
252 /* advance "prev" since "attr" is still present */
258 ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
259 ** language from the parent element (if present).
261 ** NOTE: elem_size() *depends* upon this pointer equality.
263 if (elem->lang == NULL && elem->parent != NULL)
264 elem->lang = elem->parent->lang;
266 /* adjust the element's namespace */
267 colon = ap_strchr(elem_name, ':');
270 * The element is using the default namespace, which will always
271 * be found. Either it will be "no namespace", or a default
272 * namespace URI has been specified at some point.
274 elem->ns = find_prefix(ctx, "");
276 else if (AP_XML_NS_IS_RESERVED(elem->name)) {
277 elem->ns = AP_XML_NS_NONE;
281 elem->ns = find_prefix(ctx, elem->name);
282 elem->name = colon + 1;
284 if (AP_XML_NS_IS_ERROR(elem->ns)) {
285 ctx->error = elem->ns;
290 /* adjust all remaining attributes' namespaces */
291 for (attr = elem->attr; attr; attr = attr->next) {
293 * ap_xml_attr defines this as "const" but we dup'd it, so we
294 * know that we can change it. a bit hacky, but the existing
295 * structure def is best.
297 char *attr_name = (char *)attr->name;
299 colon = ap_strchr(attr_name, ':');
302 * Attributes do NOT use the default namespace. Therefore,
303 * we place them into the "no namespace" category.
305 attr->ns = AP_XML_NS_NONE;
307 else if (AP_XML_NS_IS_RESERVED(attr->name)) {
308 attr->ns = AP_XML_NS_NONE;
312 attr->ns = find_prefix(ctx, attr->name);
313 attr->name = colon + 1;
315 if (AP_XML_NS_IS_ERROR(attr->ns)) {
316 ctx->error = attr->ns;
323 static void end_handler(void *userdata, const char *name)
325 ap_xml_ctx *ctx = userdata;
327 /* punt once we find an error */
331 /* pop up one level */
332 ctx->cur_elem = ctx->cur_elem->parent;
335 static void cdata_handler(void *userdata, const char *data, int len)
337 ap_xml_ctx *ctx = userdata;
342 /* punt once we find an error */
346 elem = ctx->cur_elem;
347 s = apr_pstrndup(ctx->p, data, len);
349 if (elem->last_child == NULL) {
350 /* no children yet. this cdata follows the start tag */
351 hdr = &elem->first_cdata;
354 /* child elements exist. this cdata follows the last child. */
355 hdr = &elem->last_child->following_cdata;
358 ap_text_append(ctx->p, hdr, s);
361 API_EXPORT(int) ap_xml_parse_input(request_rec * r, ap_xml_doc **pdoc)
368 if ((result = ap_setup_client_block(r, REQUEST_CHUNKED_DECHUNK)) != OK)
371 if (r->remaining == 0) {
377 ctx.doc = apr_pcalloc(ctx.p, sizeof(*ctx.doc));
379 ctx.doc->namespaces = apr_make_array(ctx.p, 5, sizeof(const char *));
380 ap_xml_insert_uri(ctx.doc->namespaces, "DAV:");
382 /* ### we should get the encoding from Content-Encoding */
383 parser = XML_ParserCreate(NULL);
384 if (parser == NULL) {
385 /* ### anything better to do? */
386 fprintf(stderr, "Ouch! XML_ParserCreate() failed!\n");
390 XML_SetUserData(parser, (void *) &ctx);
391 XML_SetElementHandler(parser, start_handler, end_handler);
392 XML_SetCharacterDataHandler(parser, cdata_handler);
394 if (ap_should_client_block(r)) {
399 size_t total_read = 0;
400 size_t limit_xml_body = ap_get_limit_xml_body(r);
402 /* allocate our working buffer */
403 buffer = apr_palloc(r->pool, AP_XML_READ_BLOCKSIZE);
405 /* read the body, stuffing it into the parser */
406 while ((len = ap_get_client_block(r, buffer, AP_XML_READ_BLOCKSIZE)) > 0) {
408 if (limit_xml_body && total_read > limit_xml_body) {
409 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
410 "XML request body is larger than the configured "
411 "limit of %lu", (unsigned long)limit_xml_body);
415 rv = XML_Parse(parser, buffer, len, 0);
420 /* ap_get_client_block() has logged an error */
424 /* tell the parser that we're done */
425 rv = XML_Parse(parser, &end, 0, 1);
430 XML_ParserFree(parser);
434 case AP_XML_NS_ERROR_UNKNOWN_PREFIX:
435 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
436 "An undefined namespace prefix was used.");
440 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
441 "There was an error within the XML request body.");
445 /* Apache will supply a default error, plus the error log above. */
446 return HTTP_BAD_REQUEST;
449 /* ### assert: ctx.cur_elem == NULL */
457 enum XML_Error err = XML_GetErrorCode(parser);
459 /* ### fix this error message (default vs special) */
460 ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
461 "XML parser error code: %s (%d).",
462 XML_ErrorString(err), err);
464 XML_ParserFree(parser);
466 /* Apache will supply a default error, plus the error log above. */
467 return HTTP_BAD_REQUEST;
471 XML_ParserFree(parser);
473 /* Apache will supply a default error, plus whatever was logged. */
474 return HTTP_BAD_REQUEST;
477 API_EXPORT(void) ap_text_append(apr_pool_t * p, ap_text_header *hdr,
480 ap_text *t = apr_palloc(p, sizeof(*t));
485 if (hdr->first == NULL) {
486 /* no text elements yet */
487 hdr->first = hdr->last = t;
490 /* append to the last text element */
497 /* ---------------------------------------------------------------
499 ** XML UTILITY FUNCTIONS
503 ** ap_xml_quote_string: quote an XML string
505 ** Replace '<', '>', and '&' with '<', '>', and '&'.
506 ** If quotes is true, then replace '"' with '"'.
508 ** quotes is typically set to true for XML strings that will occur within
509 ** double quotes -- attribute values.
511 API_EXPORT(const char *) ap_xml_quote_string(apr_pool_t *p, const char *s,
521 for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
522 if (c == '<' || c == '>')
523 extra += 3; /* < or > */
525 extra += 4; /* & */
526 else if (quotes && c == '"')
527 extra += 5; /* " */
534 qstr = apr_palloc(p, len + extra + 1);
535 for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
555 else if (quotes && c == '"') {
572 /* how many characters for the given integer? */
573 #define AP_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
574 (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
575 (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
576 (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
578 static int text_size(const ap_text *t)
582 for (; t; t = t->next)
583 size += strlen(t->text);
587 static size_t elem_size(const ap_xml_elem *elem, int style,
588 apr_array_header_t *namespaces, int *ns_map)
592 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
593 const ap_xml_attr *attr;
597 if (style == AP_XML_X2T_FULL_NS_LANG) {
601 ** The outer element will contain xmlns:ns%d="%s" attributes
602 ** and an xml:lang attribute, if applicable.
605 for (i = namespaces->nelts; i--;) {
606 /* compute size of: ' xmlns:ns%d="%s"' */
607 size += (9 + AP_XML_NS_LEN(i) + 2 +
608 strlen(AP_XML_GET_URI_ITEM(namespaces, i)) + 1);
611 if (elem->lang != NULL) {
612 /* compute size of: ' xml:lang="%s"' */
613 size += 11 + strlen(elem->lang) + 1;
617 if (elem->ns == AP_XML_NS_NONE) {
618 /* compute size of: <%s> */
619 size += 1 + strlen(elem->name) + 1;
622 int ns = ns_map ? ns_map[elem->ns] : elem->ns;
624 /* compute size of: <ns%d:%s> */
625 size += 3 + AP_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
628 if (AP_XML_ELEM_IS_EMPTY(elem)) {
629 /* insert a closing "/" */
634 * two of above plus "/":
635 * <ns%d:%s> ... </ns%d:%s>
641 for (attr = elem->attr; attr; attr = attr->next) {
642 if (attr->ns == AP_XML_NS_NONE) {
643 /* compute size of: ' %s="%s"' */
644 size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
647 /* compute size of: ' ns%d:%s="%s"' */
648 size += 3 + AP_XML_NS_LEN(attr->ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
653 ** If the element has an xml:lang value that is *different* from
654 ** its parent, then add the thing in: ' xml:lang="%s"'.
656 ** NOTE: we take advantage of the pointer equality established by
657 ** the parsing for "inheriting" the xml:lang values from parents.
659 if (elem->lang != NULL &&
660 (elem->parent == NULL || elem->lang != elem->parent->lang)) {
661 size += 11 + strlen(elem->lang) + 1;
664 else if (style == AP_XML_X2T_LANG_INNER) {
666 * This style prepends the xml:lang value plus a null terminator.
667 * If a lang value is not present, then we insert a null term.
669 size = elem->lang ? strlen(elem->lang) + 1 : 1;
674 size += text_size(elem->first_cdata.first);
676 for (elem = elem->first_child; elem; elem = elem->next) {
677 /* the size of the child element plus the CDATA that follows it */
678 size += (elem_size(elem, AP_XML_X2T_FULL, NULL, ns_map) +
679 text_size(elem->following_cdata.first));
685 static char *write_text(char *s, const ap_text *t)
687 for (; t; t = t->next) {
688 size_t len = strlen(t->text);
689 memcpy(s, t->text, len);
695 static char *write_elem(char *s, const ap_xml_elem *elem, int style,
696 apr_array_header_t *namespaces, int *ns_map)
698 const ap_xml_elem *child;
702 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
703 int empty = AP_XML_ELEM_IS_EMPTY(elem);
704 const ap_xml_attr *attr;
706 if (elem->ns == AP_XML_NS_NONE) {
707 len = sprintf(s, "<%s", elem->name);
710 ns = ns_map ? ns_map[elem->ns] : elem->ns;
711 len = sprintf(s, "<ns%d:%s", ns, elem->name);
715 for (attr = elem->attr; attr; attr = attr->next) {
716 if (attr->ns == AP_XML_NS_NONE)
717 len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
719 len = sprintf(s, " ns%d:%s=\"%s\"", attr->ns, attr->name, attr->value);
723 /* add the xml:lang value if necessary */
724 if (elem->lang != NULL &&
725 (style == AP_XML_X2T_FULL_NS_LANG ||
726 elem->parent == NULL ||
727 elem->lang != elem->parent->lang)) {
728 len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
732 /* add namespace definitions, if required */
733 if (style == AP_XML_X2T_FULL_NS_LANG) {
736 for (i = namespaces->nelts; i--;) {
737 len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
738 AP_XML_GET_URI_ITEM(namespaces, i));
743 /* no more to do. close it up and go. */
753 else if (style == AP_XML_X2T_LANG_INNER) {
754 /* prepend the xml:lang value */
755 if (elem->lang != NULL) {
756 len = strlen(elem->lang);
757 memcpy(s, elem->lang, len);
763 s = write_text(s, elem->first_cdata.first);
765 for (child = elem->first_child; child; child = child->next) {
766 s = write_elem(s, child, AP_XML_X2T_FULL, NULL, ns_map);
767 s = write_text(s, child->following_cdata.first);
770 if (style == AP_XML_X2T_FULL || style == AP_XML_X2T_FULL_NS_LANG) {
771 if (elem->ns == AP_XML_NS_NONE) {
772 len = sprintf(s, "</%s>", elem->name);
775 ns = ns_map ? ns_map[elem->ns] : elem->ns;
776 len = sprintf(s, "</ns%d:%s>", ns, elem->name);
784 API_EXPORT(void) ap_xml_quote_elem(apr_pool_t *p, ap_xml_elem *elem)
787 ap_xml_attr *scan_attr;
788 ap_xml_elem *scan_elem;
790 /* convert the element's text */
791 for (scan_txt = elem->first_cdata.first;
793 scan_txt = scan_txt->next) {
794 scan_txt->text = ap_xml_quote_string(p, scan_txt->text, 0);
796 for (scan_txt = elem->following_cdata.first;
798 scan_txt = scan_txt->next) {
799 scan_txt->text = ap_xml_quote_string(p, scan_txt->text, 0);
802 /* convert the attribute values */
803 for (scan_attr = elem->attr;
805 scan_attr = scan_attr->next) {
806 scan_attr->value = ap_xml_quote_string(p, scan_attr->value, 1);
809 /* convert the child elements */
810 for (scan_elem = elem->first_child;
812 scan_elem = scan_elem->next) {
813 ap_xml_quote_elem(p, scan_elem);
817 /* convert an element to a text string */
818 API_EXPORT(void) ap_xml_to_text(apr_pool_t * p, const ap_xml_elem *elem,
819 int style, apr_array_header_t *namespaces,
820 int *ns_map, const char **pbuf, size_t *psize)
822 /* get the exact size, plus a null terminator */
823 size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
824 char *s = apr_palloc(p, size);
826 (void) write_elem(s, elem, style, namespaces, ns_map);
834 API_EXPORT(const char *) ap_xml_empty_elem(apr_pool_t * p,
835 const ap_xml_elem *elem)
837 if (elem->ns == AP_XML_NS_NONE) {
839 * The prefix (xml...) is already within the prop name, or
840 * the element simply has no prefix.
842 return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
845 return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
848 /* return the URI's (existing) index, or insert it and return a new index */
849 API_EXPORT(int) ap_xml_insert_uri(apr_array_header_t *uri_array,
855 for (i = uri_array->nelts; i--;) {
856 if (strcmp(uri, AP_XML_GET_URI_ITEM(uri_array, i)) == 0)
860 pelt = apr_push_array(uri_array);
861 *pelt = uri; /* assume uri is const or in a pool */
862 return uri_array->nelts - 1;