2 ** Licensed to the Apache Software Foundation (ASF) under one or more
3 ** contributor license agreements. See the NOTICE file distributed with
4 ** this work for additional information regarding copyright ownership.
5 ** The ASF licenses this file to You under the Apache License, Version 2.0
6 ** (the "License"); you may not use this file except in compliance with
7 ** the License. You may obtain a copy of the License at
9 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
18 #include "apreq_parser.h"
19 #include "apreq_error.h"
20 #include "apreq_util.h"
21 #include "apr_strings.h"
22 #include "apr_strmatch.h"
25 #define CRLF "\015\012"
28 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
30 #define PARSER_STATUS_CHECK(PREFIX) do { \
31 if (ctx->status == PREFIX##_ERROR) \
32 return APREQ_ERROR_GENERAL; \
33 else if (ctx->status == PREFIX##_COMPLETE) \
35 else if (bb == NULL) \
36 return APR_INCOMPLETE; \
39 /* maximum recursion level in the mfd parser */
44 apr_bucket_brigade *in;
45 apr_bucket_brigade *bb;
46 apreq_parser_t *hdr_parser;
47 apreq_parser_t *next_parser;
48 const apr_strmatch_pattern *pattern;
62 const char *param_name;
63 apreq_param_t *upload;
68 /********************* multipart/form-data *********************/
71 static apr_status_t brigade_start_string(apr_bucket_brigade *bb,
72 const char *start_string)
75 apr_size_t slen = strlen(start_string);
77 for (e = APR_BRIGADE_FIRST(bb); e != APR_BRIGADE_SENTINEL(bb);
78 e = APR_BUCKET_NEXT(e))
81 apr_status_t s, bytes_to_check;
87 if (APR_BUCKET_IS_EOS(e))
90 s = apr_bucket_read(e, &buf, &blen, APR_BLOCK_READ);
98 bytes_to_check = MIN(slen,blen);
100 if (strncmp(buf,start_string,bytes_to_check) != 0)
101 return APREQ_ERROR_GENERAL;
103 slen -= bytes_to_check;
104 start_string += bytes_to_check;
107 /* slen > 0, so brigade isn't large enough yet */
108 return APR_INCOMPLETE;
112 static apr_status_t split_on_bdry(apr_bucket_brigade *out,
113 apr_bucket_brigade *in,
114 const apr_strmatch_pattern *pattern,
117 apr_bucket *e = APR_BRIGADE_FIRST(in);
118 apr_size_t blen = strlen(bdry), off = 0;
120 while ( e != APR_BRIGADE_SENTINEL(in) ) {
126 if (APR_BUCKET_IS_EOS(e))
129 s = apr_bucket_read(e, &buf, &len, APR_BLOCK_READ);
130 if (s != APR_SUCCESS)
135 e = APR_BUCKET_NEXT(e);
136 apr_bucket_delete(f);
140 look_for_boundary_up_front:
141 if (strncmp(bdry + off, buf, MIN(len, blen - off)) == 0) {
142 if ( len >= blen - off ) {
144 if (len > blen - off)
145 apr_bucket_split(e, blen - off);
146 e = APR_BUCKET_NEXT(e);
149 apr_bucket *f = APR_BRIGADE_FIRST(in);
150 apr_bucket_delete(f);
151 } while (APR_BRIGADE_FIRST(in) != e);
157 e = APR_BUCKET_NEXT(e);
161 /* prior (partial) strncmp failed,
162 * so we can move previous buckets across
163 * and retest buf against the full bdry.
166 /* give hints to GCC by making the brigade volatile, otherwise the
167 * loop below will end up being endless. See:
168 * https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=193740
170 apr_bucket_brigade * volatile in_v = in;
173 apr_bucket *f = APR_BRIGADE_FIRST(in_v);
174 APR_BUCKET_REMOVE(f);
175 APR_BRIGADE_INSERT_TAIL(out, f);
176 } while (e != APR_BRIGADE_FIRST(in_v));
178 goto look_for_boundary_up_front;
181 if (pattern != NULL && len >= blen) {
182 const char *match = apr_strmatch(pattern, buf, len);
186 idx = apreq_index(buf + len-blen, blen, bdry, blen,
187 APREQ_MATCH_PARTIAL);
193 idx = apreq_index(buf, len, bdry, blen, APREQ_MATCH_PARTIAL);
195 /* Theoretically idx should never be 0 here, because we
196 * already tested the front of the brigade for a potential match.
197 * However, it doesn't hurt to allow for the possibility,
198 * since this will just start the whole loop over again.
201 apr_bucket_split(e, idx);
203 APR_BUCKET_REMOVE(e);
204 APR_BRIGADE_INSERT_TAIL(out, e);
205 e = APR_BRIGADE_FIRST(in);
208 return APR_INCOMPLETE;
213 struct mfd_ctx * create_multipart_context(const char *content_type,
215 apr_bucket_alloc_t *ba,
216 apr_size_t brigade_limit,
217 const char *temp_dir,
223 struct mfd_ctx *ctx = apr_palloc(pool, sizeof *ctx);
224 char *ct = apr_pstrdup(pool, content_type);
226 ct = strchr(ct, ';');
228 return NULL; /* missing semicolon */
231 s = apreq_header_attribute(ct, "boundary", 8,
232 (const char **)&ctx->bdry, &blen);
234 if (s != APR_SUCCESS)
235 return NULL; /* missing boundary */
244 ctx->status = MFD_INIT;
245 ctx->pattern = apr_strmatch_precompile(pool, ctx->bdry, 1);
246 ctx->hdr_parser = apreq_parser_make(pool, ba, "",
249 temp_dir, NULL, NULL);
251 ctx->bb = apr_brigade_create(pool, ba);
252 ctx->in = apr_brigade_create(pool, ba);
253 ctx->eos = apr_bucket_eos_create(ba);
254 ctx->next_parser = NULL;
255 ctx->param_name = NULL;
262 APREQ_DECLARE_PARSER(apreq_parse_multipart)
264 apr_pool_t *pool = parser->pool;
265 apr_bucket_alloc_t *ba = parser->bucket_alloc;
266 struct mfd_ctx *ctx = parser->ctx;
270 ctx = create_multipart_context(parser->content_type,
272 parser->brigade_limit,
273 parser->temp_dir, 1);
275 return APREQ_ERROR_GENERAL;
281 PARSER_STATUS_CHECK(MFD);
282 APR_BRIGADE_CONCAT(ctx->in, bb);
286 switch (ctx->status) {
290 s = split_on_bdry(ctx->bb, ctx->in, NULL, ctx->bdry + 2);
291 if (s != APR_SUCCESS) {
292 apreq_brigade_setaside(ctx->in, pool);
293 apreq_brigade_setaside(ctx->bb, pool);
296 ctx->status = MFD_NEXTLINE;
297 /* Be polite and return any preamble text to the caller. */
298 APR_BRIGADE_CONCAT(bb, ctx->bb);
305 s = split_on_bdry(ctx->bb, ctx->in, NULL, CRLF);
307 ctx->status = MFD_COMPLETE;
310 if (s != APR_SUCCESS) {
311 apreq_brigade_setaside(ctx->in, pool);
312 apreq_brigade_setaside(ctx->bb, pool);
315 if (!APR_BRIGADE_EMPTY(ctx->bb)) {
318 apr_brigade_pflatten(ctx->bb, &line, &len, pool);
320 if (len >= 2 && strncmp(line, "--", 2) == 0) {
321 APR_BRIGADE_CONCAT(bb, ctx->in);
322 ctx->status = MFD_COMPLETE;
325 apr_brigade_cleanup(ctx->bb);
328 ctx->status = MFD_HEADER;
335 if (ctx->info == NULL) {
336 ctx->info = apr_table_make(pool, APREQ_DEFAULT_NELTS);
337 /* flush out header parser internal structs for reuse */
338 ctx->hdr_parser->ctx = NULL;
340 s = apreq_parser_run(ctx->hdr_parser, ctx->info, ctx->in);
343 ctx->status = MFD_POST_HEADER;
346 apreq_brigade_setaside(ctx->in, pool);
347 return APR_INCOMPLETE;
349 ctx->status = MFD_ERROR;
355 case MFD_POST_HEADER:
357 /* Must handle special case of missing CRLF (mainly
358 * coming from empty file uploads). See RFC2065 S5.1.1:
360 * body-part = MIME-part-header [CRLF *OCTET]
362 * So the CRLF we already matched in MFD_HEADER may have been
363 * part of the boundary string! Both Konqueror (v??) and
364 * Mozilla-0.97 are known to emit such blocks.
366 * Here we first check for this condition with
367 * brigade_start_string, and prefix the brigade with
368 * an additional CRLF bucket if necessary.
371 const char *cd, *ct, *name, *filename;
372 apr_size_t nlen, flen;
375 switch (brigade_start_string(ctx->in, ctx->bdry + 2)) {
378 apreq_brigade_setaside(ctx->in, pool);
379 return APR_INCOMPLETE;
382 /* part has no body- return CRLF to front */
383 e = apr_bucket_immortal_create(CRLF, 2,
384 ctx->bb->bucket_alloc);
385 APR_BRIGADE_INSERT_HEAD(ctx->in, e);
392 cd = apr_table_get(ctx->info, "Content-Disposition");
394 /* First check to see if must descend into a new multipart
395 * block. If we do, create a new parser and pass control
399 ct = apr_table_get(ctx->info, "Content-Type");
401 if (ct != NULL && strncmp(ct, "multipart/", 10) == 0) {
402 struct mfd_ctx *next_ctx;
404 if (ctx->level >= MAX_LEVEL) {
405 ctx->status = MFD_ERROR;
406 goto mfd_parse_brigade;
409 next_ctx = create_multipart_context(ct, pool, ba,
410 parser->brigade_limit,
414 next_ctx->param_name = "";
417 s = apreq_header_attribute(cd, "name", 4,
419 if (s == APR_SUCCESS) {
421 = apr_pstrmemdup(pool, name, nlen);
424 const char *cid = apr_table_get(ctx->info,
427 next_ctx->param_name = apr_pstrdup(pool, cid);
432 ctx->next_parser = apreq_parser_make(pool, ba, ct,
433 apreq_parse_multipart,
434 parser->brigade_limit,
438 ctx->status = MFD_MIXED;
439 goto mfd_parse_brigade;
443 /* Look for a normal form-data part. */
445 if (cd != NULL && strncmp(cd, "form-data", 9) == 0) {
446 s = apreq_header_attribute(cd, "name", 4, &name, &nlen);
447 if (s != APR_SUCCESS) {
448 ctx->status = MFD_ERROR;
449 goto mfd_parse_brigade;
452 s = apreq_header_attribute(cd, "filename",
453 8, &filename, &flen);
454 if (s == APR_SUCCESS) {
455 apreq_param_t *param;
457 param = apreq_param_make(pool, name, nlen,
459 apreq_param_tainted_on(param);
460 param->info = ctx->info;
462 = apr_brigade_create(pool, ctx->bb->bucket_alloc);
464 ctx->status = MFD_UPLOAD;
465 goto mfd_parse_brigade;
468 ctx->param_name = apr_pstrmemdup(pool, name, nlen);
469 ctx->status = MFD_PARAM;
474 /* else check for a file part in a multipart section */
475 else if (cd != NULL && strncmp(cd, "file", 4) == 0) {
476 apreq_param_t *param;
478 s = apreq_header_attribute(cd, "filename",
479 8, &filename, &flen);
480 if (s != APR_SUCCESS || ctx->param_name == NULL) {
481 ctx->status = MFD_ERROR;
482 goto mfd_parse_brigade;
484 name = ctx->param_name;
486 param = apreq_param_make(pool, name, nlen,
488 apreq_param_tainted_on(param);
489 param->info = ctx->info;
490 param->upload = apr_brigade_create(pool,
491 ctx->bb->bucket_alloc);
493 ctx->status = MFD_UPLOAD;
494 goto mfd_parse_brigade;
497 /* otherwise look for Content-ID in multipart/mixed case */
499 const char *cid = apr_table_get(ctx->info, "Content-ID");
500 apreq_param_t *param;
513 param = apreq_param_make(pool, name, nlen,
515 apreq_param_tainted_on(param);
516 param->info = ctx->info;
517 param->upload = apr_brigade_create(pool,
518 ctx->bb->bucket_alloc);
520 ctx->status = MFD_UPLOAD;
521 goto mfd_parse_brigade;
528 apreq_param_t *param;
533 s = split_on_bdry(ctx->bb, ctx->in, ctx->pattern, ctx->bdry);
538 apreq_brigade_setaside(ctx->in, pool);
539 apreq_brigade_setaside(ctx->bb, pool);
543 s = apr_brigade_length(ctx->bb, 1, &off);
544 if (s != APR_SUCCESS) {
545 ctx->status = MFD_ERROR;
549 param = apreq_param_make(pool, ctx->param_name,
550 strlen(ctx->param_name),
552 apreq_param_tainted_on(param);
553 param->info = ctx->info;
555 *(const apreq_value_t **)&v = ¶m->v;
556 apr_brigade_flatten(ctx->bb, v->data, &len);
559 if (parser->hook != NULL) {
560 s = apreq_hook_run(parser->hook, param, NULL);
561 if (s != APR_SUCCESS) {
562 ctx->status = MFD_ERROR;
567 apreq_param_charset_set(param,
568 apreq_charset_divine(v->data, len));
569 apreq_value_table_add(v, t);
570 ctx->status = MFD_NEXTLINE;
571 ctx->param_name = NULL;
572 apr_brigade_cleanup(ctx->bb);
573 goto mfd_parse_brigade;
576 ctx->status = MFD_ERROR;
582 break; /* not reached */
586 apreq_param_t *param = ctx->upload;
588 s = split_on_bdry(ctx->bb, ctx->in, ctx->pattern, ctx->bdry);
592 if (parser->hook != NULL) {
593 s = apreq_hook_run(parser->hook, param, ctx->bb);
594 if (s != APR_SUCCESS) {
595 ctx->status = MFD_ERROR;
599 apreq_brigade_setaside(ctx->bb, pool);
600 apreq_brigade_setaside(ctx->in, pool);
601 s = apreq_brigade_concat(pool, parser->temp_dir,
602 parser->brigade_limit,
603 param->upload, ctx->bb);
604 return (s == APR_SUCCESS) ? APR_INCOMPLETE : s;
607 if (parser->hook != NULL) {
608 APR_BRIGADE_INSERT_TAIL(ctx->bb, ctx->eos);
609 s = apreq_hook_run(parser->hook, param, ctx->bb);
610 APR_BUCKET_REMOVE(ctx->eos);
611 if (s != APR_SUCCESS) {
612 ctx->status = MFD_ERROR;
616 apreq_value_table_add(¶m->v, t);
617 apreq_brigade_setaside(ctx->bb, pool);
618 s = apreq_brigade_concat(pool, parser->temp_dir,
619 parser->brigade_limit,
620 param->upload, ctx->bb);
622 if (s != APR_SUCCESS)
625 ctx->status = MFD_NEXTLINE;
626 goto mfd_parse_brigade;
629 ctx->status = MFD_ERROR;
634 break; /* not reached */
639 s = apreq_parser_run(ctx->next_parser, t, ctx->in);
642 ctx->status = MFD_INIT;
643 ctx->param_name = NULL;
644 goto mfd_parse_brigade;
646 APR_BRIGADE_CONCAT(bb, ctx->in);
647 return APR_INCOMPLETE;
649 ctx->status = MFD_ERROR;
654 break; /* not reached */
657 return APREQ_ERROR_GENERAL;
660 return APR_INCOMPLETE;