granicus.if.org Git - apache/blob - server/util.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * util.c: string utility things
  19  *
  20  * 3/21/93 Rob McCool
  21  * 1995-96 Many changes by the Apache Software Foundation
  22  *
  23  */
  24
  25 /* Debugging aid:
  26  * #define DEBUG            to trace all cfg_open*()/cfg_closefile() calls
  27  * #define DEBUG_CFG_LINES  to trace every line read from the config files
  28  */
  29
  30 #include "apr.h"
  31 #include "apr_strings.h"
  32 #include "apr_lib.h"
  33
  34 #define APR_WANT_STDIO
  35 #define APR_WANT_STRFUNC
  36 #include "apr_want.h"
  37
  38 #if APR_HAVE_UNISTD_H
  39 #include <unistd.h>
  40 #endif
  41 #if APR_HAVE_PROCESS_H
  42 #include <process.h>            /* for getpid() on Win32 */
  43 #endif
  44 #if APR_HAVE_NETDB_H
  45 #include <netdb.h>              /* for gethostbyname() */
  46 #endif
  47
  48 #include "ap_config.h"
  49 #include "apr_base64.h"
  50 #include "httpd.h"
  51 #include "http_main.h"
  52 #include "http_log.h"
  53 #include "http_protocol.h"
  54 #include "http_config.h"
  55 #include "util_ebcdic.h"
  56
  57 #ifdef HAVE_PWD_H
  58 #include <pwd.h>
  59 #endif
  60 #ifdef HAVE_GRP_H
  61 #include <grp.h>
  62 #endif
  63
  64 /* A bunch of functions in util.c scan strings looking for certain characters.
  65  * To make that more efficient we encode a lookup table.  The test_char_table
  66  * is generated automatically by gen_test_char.c.
  67  */
  68 #include "test_char.h"
  69
  70 /* we assume the folks using this ensure 0 <= c < 256... which means
  71  * you need a cast to (unsigned char) first, you can't just plug a
  72  * char in here and get it to work, because if char is signed then it
  73  * will first be sign extended.
  74  */
  75 #define TEST_CHAR(c, f)        (test_char_table[(unsigned)(c)] & (f))
  76
  77 /* Win32/NetWare/OS2 need to check for both forward and back slashes
  78  * in ap_getparents() and ap_escape_url.
  79  */
  80 #ifdef CASE_BLIND_FILESYSTEM
  81 #define IS_SLASH(s) ((s == '/') || (s == '\\'))
  82 #define SLASHES "/\\"
  83 #else
  84 #define IS_SLASH(s) (s == '/')
  85 #define SLASHES "/"
  86 #endif
  87
  88 APLOG_USE_MODULE(core);
  89
  90
  91 /*
  92  * Examine a field value (such as a media-/content-type) string and return
  93  * it sans any parameters; e.g., strip off any ';charset=foo' and the like.
  94  */
  95 AP_DECLARE(char *) ap_field_noparam(apr_pool_t *p, const char *intype)
  96 {
  97     const char *semi;
  98
  99     if (intype == NULL) return NULL;
 100
 101     semi = ap_strchr_c(intype, ';');
 102     if (semi == NULL) {
 103         return apr_pstrdup(p, intype);
 104     }
 105     else {
 106         while ((semi > intype) && apr_isspace(semi[-1])) {
 107             semi--;
 108         }
 109         return apr_pstrndup(p, intype, semi - intype);
 110     }
 111 }
 112
 113 AP_DECLARE(char *) ap_ht_time(apr_pool_t *p, apr_time_t t, const char *fmt,
 114                               int gmt)
 115 {
 116     apr_size_t retcode;
 117     char ts[MAX_STRING_LEN];
 118     char tf[MAX_STRING_LEN];
 119     apr_time_exp_t xt;
 120
 121     if (gmt) {
 122         const char *f;
 123         char *strp;
 124
 125         apr_time_exp_gmt(&xt, t);
 126         /* Convert %Z to "GMT" and %z to "+0000";
 127          * on hosts that do not have a time zone string in struct tm,
 128          * strftime must assume its argument is local time.
 129          */
 130         for(strp = tf, f = fmt; strp < tf + sizeof(tf) - 6 && (*strp = *f)
 131             ; f++, strp++) {
 132             if (*f != '%') continue;
 133             switch (f[1]) {
 134             case '%':
 135                 *++strp = *++f;
 136                 break;
 137             case 'Z':
 138                 *strp++ = 'G';
 139                 *strp++ = 'M';
 140                 *strp = 'T';
 141                 f++;
 142                 break;
 143             case 'z': /* common extension */
 144                 *strp++ = '+';
 145                 *strp++ = '0';
 146                 *strp++ = '0';
 147                 *strp++ = '0';
 148                 *strp = '0';
 149                 f++;
 150                 break;
 151             }
 152         }
 153         *strp = '\0';
 154         fmt = tf;
 155     }
 156     else {
 157         apr_time_exp_lt(&xt, t);
 158     }
 159
 160     /* check return code? */
 161     apr_strftime(ts, &retcode, MAX_STRING_LEN, fmt, &xt);
 162     ts[MAX_STRING_LEN - 1] = '\0';
 163     return apr_pstrdup(p, ts);
 164 }
 165
 166 /* Roy owes Rob beer. */
 167 /* Rob owes Roy dinner. */
 168
 169 /* These legacy comments would make a lot more sense if Roy hadn't
 170  * replaced the old later_than() routine with util_date.c.
 171  *
 172  * Well, okay, they still wouldn't make any sense.
 173  */
 174
 175 /* Match = 0, NoMatch = 1, Abort = -1
 176  * Based loosely on sections of wildmat.c by Rich Salz
 177  * Hmmm... shouldn't this really go component by component?
 178  */
 179 AP_DECLARE(int) ap_strcmp_match(const char *str, const char *expected)
 180 {
 181     int x, y;
 182
 183     for (x = 0, y = 0; expected[y]; ++y, ++x) {
 184         if ((!str[x]) && (expected[y] != '*'))
 185             return -1;
 186         if (expected[y] == '*') {
 187             while (expected[++y] == '*');
 188             if (!expected[y])
 189                 return 0;
 190             while (str[x]) {
 191                 int ret;
 192                 if ((ret = ap_strcmp_match(&str[x++], &expected[y])) != 1)
 193                     return ret;
 194             }
 195             return -1;
 196         }
 197         else if ((expected[y] != '?') && (str[x] != expected[y]))
 198             return 1;
 199     }
 200     return (str[x] != '\0');
 201 }
 202
 203 AP_DECLARE(int) ap_strcasecmp_match(const char *str, const char *expected)
 204 {
 205     int x, y;
 206
 207     for (x = 0, y = 0; expected[y]; ++y, ++x) {
 208         if (!str[x] && expected[y] != '*')
 209             return -1;
 210         if (expected[y] == '*') {
 211             while (expected[++y] == '*');
 212             if (!expected[y])
 213                 return 0;
 214             while (str[x]) {
 215                 int ret;
 216                 if ((ret = ap_strcasecmp_match(&str[x++], &expected[y])) != 1)
 217                     return ret;
 218             }
 219             return -1;
 220         }
 221         else if (expected[y] != '?'
 222                  && apr_tolower(str[x]) != apr_tolower(expected[y]))
 223             return 1;
 224     }
 225     return (str[x] != '\0');
 226 }
 227
 228 /* We actually compare the canonical root to this root, (but we don't
 229  * waste time checking the case), since every use of this function in
 230  * httpd-2.1 tests if the path is 'proper', meaning we've already passed
 231  * it through apr_filepath_merge, or we haven't.
 232  */
 233 AP_DECLARE(int) ap_os_is_path_absolute(apr_pool_t *p, const char *dir)
 234 {
 235     const char *newpath;
 236     const char *ourdir = dir;
 237     if (apr_filepath_root(&newpath, &dir, 0, p) != APR_SUCCESS
 238             || strncmp(newpath, ourdir, strlen(newpath)) != 0) {
 239         return 0;
 240     }
 241     return 1;
 242 }
 243
 244 AP_DECLARE(int) ap_is_matchexp(const char *str)
 245 {
 246     register int x;
 247
 248     for (x = 0; str[x]; x++)
 249         if ((str[x] == '*') || (str[x] == '?'))
 250             return 1;
 251     return 0;
 252 }
 253
 254 /*
 255  * Here's a pool-based interface to the POSIX-esque ap_regcomp().
 256  * Note that we return ap_regex_t instead of being passed one.
 257  * The reason is that if you use an already-used ap_regex_t structure,
 258  * the memory that you've already allocated gets forgotten, and
 259  * regfree() doesn't clear it. So we don't allow it.
 260  */
 261
 262 static apr_status_t regex_cleanup(void *preg)
 263 {
 264     ap_regfree((ap_regex_t *) preg);
 265     return APR_SUCCESS;
 266 }
 267
 268 AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
 269                                      int cflags)
 270 {
 271     ap_regex_t *preg = apr_palloc(p, sizeof *preg);
 272
 273     if (ap_regcomp(preg, pattern, cflags)) {
 274         return NULL;
 275     }
 276
 277     apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
 278                               apr_pool_cleanup_null);
 279
 280     return preg;
 281 }
 282
 283 AP_DECLARE(void) ap_pregfree(apr_pool_t *p, ap_regex_t *reg)
 284 {
 285     ap_regfree(reg);
 286     apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup);
 287 }
 288
 289 /*
 290  * Similar to standard strstr() but we ignore case in this version.
 291  * Based on the strstr() implementation further below.
 292  */
 293 AP_DECLARE(char *) ap_strcasestr(const char *s1, const char *s2)
 294 {
 295     char *p1, *p2;
 296     if (*s2 == '\0') {
 297         /* an empty s2 */
 298         return((char *)s1);
 299     }
 300     while(1) {
 301         for ( ; (*s1 != '\0') && (apr_tolower(*s1) != apr_tolower(*s2)); s1++);
 302         if (*s1 == '\0') {
 303             return(NULL);
 304         }
 305         /* found first character of s2, see if the rest matches */
 306         p1 = (char *)s1;
 307         p2 = (char *)s2;
 308         for (++p1, ++p2; apr_tolower(*p1) == apr_tolower(*p2); ++p1, ++p2) {
 309             if (*p1 == '\0') {
 310                 /* both strings ended together */
 311                 return((char *)s1);
 312             }
 313         }
 314         if (*p2 == '\0') {
 315             /* second string ended, a match */
 316             break;
 317         }
 318         /* didn't find a match here, try starting at next character in s1 */
 319         s1++;
 320     }
 321     return((char *)s1);
 322 }
 323
 324 /*
 325  * Returns an offsetted pointer in bigstring immediately after
 326  * prefix. Returns bigstring if bigstring doesn't start with
 327  * prefix or if prefix is longer than bigstring while still matching.
 328  * NOTE: pointer returned is relative to bigstring, so we
 329  * can use standard pointer comparisons in the calling function
 330  * (eg: test if ap_stripprefix(a,b) == a)
 331  */
 332 AP_DECLARE(const char *) ap_stripprefix(const char *bigstring,
 333                                         const char *prefix)
 334 {
 335     const char *p1;
 336
 337     if (*prefix == '\0')
 338         return bigstring;
 339
 340     p1 = bigstring;
 341     while (*p1 && *prefix) {
 342         if (*p1++ != *prefix++)
 343             return bigstring;
 344     }
 345     if (*prefix == '\0')
 346         return p1;
 347
 348     /* hit the end of bigstring! */
 349     return bigstring;
 350 }
 351
 352 /* This function substitutes for $0-$9, filling in regular expression
 353  * submatches. Pass it the same nmatch and pmatch arguments that you
 354  * passed ap_regexec(). pmatch should not be greater than the maximum number
 355  * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t.
 356  *
 357  * input should be the string with the $-expressions, source should be the
 358  * string that was matched against.
 359  *
 360  * It returns the substituted string, or NULL on error.
 361  *
 362  * Parts of this code are based on Henry Spencer's regsub(), from his
 363  * AT&T V8 regexp package.
 364  */
 365
 366 AP_DECLARE(char *) ap_pregsub(apr_pool_t *p, const char *input,
 367                               const char *source, size_t nmatch,
 368                               ap_regmatch_t pmatch[])
 369 {
 370     const char *src = input;
 371     char *dest, *dst;
 372     char c;
 373     size_t no;
 374     int len;
 375
 376     if (!source)
 377         return NULL;
 378     if (!nmatch)
 379         return apr_pstrdup(p, src);
 380
 381     /* First pass, find the size */
 382
 383     len = 0;
 384
 385     while ((c = *src++) != '\0') {
 386         if (c == '$' && apr_isdigit(*src))
 387             no = *src++ - '0';
 388         else
 389             no = 10;
 390
 391         if (no > 9) {                /* Ordinary character. */
 392             if (c == '\\' && *src)
 393                 src++;
 394             len++;
 395         }
 396         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 397             len += pmatch[no].rm_eo - pmatch[no].rm_so;
 398         }
 399
 400     }
 401
 402     dest = dst = apr_pcalloc(p, len + 1);
 403
 404     /* Now actually fill in the string */
 405
 406     src = input;
 407
 408     while ((c = *src++) != '\0') {
 409         if (c == '&')
 410             no = 0;
 411         else if (c == '$' && apr_isdigit(*src))
 412             no = *src++ - '0';
 413         else
 414             no = 10;
 415
 416         if (no > 9) {                /* Ordinary character. */
 417             if (c == '\\' && (*src == '$' || *src == '&'))
 418                 c = *src++;
 419             *dst++ = c;
 420         }
 421         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 422             len = pmatch[no].rm_eo - pmatch[no].rm_so;
 423             memcpy(dst, source + pmatch[no].rm_so, len);
 424             dst += len;
 425         }
 426
 427     }
 428     *dst = '\0';
 429
 430     return dest;
 431 }
 432
 433 /*
 434  * Parse .. so we don't compromise security
 435  */
 436 AP_DECLARE(void) ap_getparents(char *name)
 437 {
 438     char *next;
 439     int l, w, first_dot;
 440
 441     /* Four paseses, as per RFC 1808 */
 442     /* a) remove ./ path segments */
 443     for (next = name; *next && (*next != '.'); next++) {
 444     }
 445
 446     l = w = first_dot = next - name;
 447     while (name[l] != '\0') {
 448         if (name[l] == '.' && IS_SLASH(name[l + 1])
 449             && (l == 0 || IS_SLASH(name[l - 1])))
 450             l += 2;
 451         else
 452             name[w++] = name[l++];
 453     }
 454
 455     /* b) remove trailing . path, segment */
 456     if (w == 1 && name[0] == '.')
 457         w--;
 458     else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
 459         w--;
 460     name[w] = '\0';
 461
 462     /* c) remove all xx/../ segments. (including leading ../ and /../) */
 463     l = first_dot;
 464
 465     while (name[l] != '\0') {
 466         if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
 467             && (l == 0 || IS_SLASH(name[l - 1]))) {
 468             register int m = l + 3, n;
 469
 470             l = l - 2;
 471             if (l >= 0) {
 472                 while (l >= 0 && !IS_SLASH(name[l]))
 473                     l--;
 474                 l++;
 475             }
 476             else
 477                 l = 0;
 478             n = l;
 479             while ((name[n] = name[m]))
 480                 (++n, ++m);
 481         }
 482         else
 483             ++l;
 484     }
 485
 486     /* d) remove trailing xx/.. segment. */
 487     if (l == 2 && name[0] == '.' && name[1] == '.')
 488         name[0] = '\0';
 489     else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
 490              && IS_SLASH(name[l - 3])) {
 491         l = l - 4;
 492         if (l >= 0) {
 493             while (l >= 0 && !IS_SLASH(name[l]))
 494                 l--;
 495             l++;
 496         }
 497         else
 498             l = 0;
 499         name[l] = '\0';
 500     }
 501 }
 502
 503 AP_DECLARE(void) ap_no2slash(char *name)
 504 {
 505     char *d, *s;
 506
 507     s = d = name;
 508
 509 #ifdef HAVE_UNC_PATHS
 510     /* Check for UNC names.  Leave leading two slashes. */
 511     if (s[0] == '/' && s[1] == '/')
 512         *d++ = *s++;
 513 #endif
 514
 515     while (*s) {
 516         if ((*d++ = *s) == '/') {
 517             do {
 518                 ++s;
 519             } while (*s == '/');
 520         }
 521         else {
 522             ++s;
 523         }
 524     }
 525     *d = '\0';
 526 }
 527
 528
 529 /*
 530  * copy at most n leading directories of s into d
 531  * d should be at least as large as s plus 1 extra byte
 532  * assumes n > 0
 533  * the return value is the ever useful pointer to the trailing \0 of d
 534  *
 535  * MODIFIED FOR HAVE_DRIVE_LETTERS and NETWARE environments,
 536  * so that if n == 0, "/" is returned in d with n == 1
 537  * and s == "e:/test.html", "e:/" is returned in d
 538  * *** See also directory_walk in modules/http/http_request.c
 539
 540  * examples:
 541  *    /a/b, 0  ==> /  (true for all platforms)
 542  *    /a/b, 1  ==> /
 543  *    /a/b, 2  ==> /a/
 544  *    /a/b, 3  ==> /a/b/
 545  *    /a/b, 4  ==> /a/b/
 546  *
 547  *    c:/a/b 0 ==> /
 548  *    c:/a/b 1 ==> c:/
 549  *    c:/a/b 2 ==> c:/a/
 550  *    c:/a/b 3 ==> c:/a/b
 551  *    c:/a/b 4 ==> c:/a/b
 552  */
 553 AP_DECLARE(char *) ap_make_dirstr_prefix(char *d, const char *s, int n)
 554 {
 555     if (n < 1) {
 556         *d = '/';
 557         *++d = '\0';
 558         return (d);
 559     }
 560
 561     for (;;) {
 562         if (*s == '\0' || (*s == '/' && (--n) == 0)) {
 563             *d = '/';
 564             break;
 565         }
 566         *d++ = *s++;
 567     }
 568     *++d = 0;
 569     return (d);
 570 }
 571
 572
 573 /*
 574  * return the parent directory name including trailing / of the file s
 575  */
 576 AP_DECLARE(char *) ap_make_dirstr_parent(apr_pool_t *p, const char *s)
 577 {
 578     const char *last_slash = ap_strrchr_c(s, '/');
 579     char *d;
 580     int l;
 581
 582     if (last_slash == NULL) {
 583         return apr_pstrdup(p, "");
 584     }
 585     l = (last_slash - s) + 1;
 586     d = apr_pstrmemdup(p, s, l);
 587
 588     return (d);
 589 }
 590
 591
 592 AP_DECLARE(int) ap_count_dirs(const char *path)
 593 {
 594     register int x, n;
 595
 596     for (x = 0, n = 0; path[x]; x++)
 597         if (path[x] == '/')
 598             n++;
 599     return n;
 600 }
 601
 602 AP_DECLARE(char *) ap_getword_nc(apr_pool_t *atrans, char **line, char stop)
 603 {
 604     return ap_getword(atrans, (const char **) line, stop);
 605 }
 606
 607 AP_DECLARE(char *) ap_getword(apr_pool_t *atrans, const char **line, char stop)
 608 {
 609     const char *pos = *line;
 610     int len;
 611     char *res;
 612
 613     while ((*pos != stop) && *pos) {
 614         ++pos;
 615     }
 616
 617     len = pos - *line;
 618     res = apr_pstrmemdup(atrans, *line, len);
 619
 620     if (stop) {
 621         while (*pos == stop) {
 622             ++pos;
 623         }
 624     }
 625     *line = pos;
 626
 627     return res;
 628 }
 629
 630 AP_DECLARE(char *) ap_getword_white_nc(apr_pool_t *atrans, char **line)
 631 {
 632     return ap_getword_white(atrans, (const char **) line);
 633 }
 634
 635 AP_DECLARE(char *) ap_getword_white(apr_pool_t *atrans, const char **line)
 636 {
 637     const char *pos = *line;
 638     int len;
 639     char *res;
 640
 641     while (!apr_isspace(*pos) && *pos) {
 642         ++pos;
 643     }
 644
 645     len = pos - *line;
 646     res = apr_pstrmemdup(atrans, *line, len);
 647
 648     while (apr_isspace(*pos)) {
 649         ++pos;
 650     }
 651
 652     *line = pos;
 653
 654     return res;
 655 }
 656
 657 AP_DECLARE(char *) ap_getword_nulls_nc(apr_pool_t *atrans, char **line,
 658                                        char stop)
 659 {
 660     return ap_getword_nulls(atrans, (const char **) line, stop);
 661 }
 662
 663 AP_DECLARE(char *) ap_getword_nulls(apr_pool_t *atrans, const char **line,
 664                                     char stop)
 665 {
 666     const char *pos = ap_strchr_c(*line, stop);
 667     char *res;
 668
 669     if (!pos) {
 670         res = apr_pstrdup(atrans, *line);
 671         *line += strlen(*line);
 672         return res;
 673     }
 674
 675     res = apr_pstrndup(atrans, *line, pos - *line);
 676
 677     ++pos;
 678
 679     *line = pos;
 680
 681     return res;
 682 }
 683
 684 /* Get a word, (new) config-file style --- quoted strings and backslashes
 685  * all honored
 686  */
 687
 688 static char *substring_conf(apr_pool_t *p, const char *start, int len,
 689                             char quote)
 690 {
 691     char *result = apr_palloc(p, len + 2);
 692     char *resp = result;
 693     int i;
 694
 695     for (i = 0; i < len; ++i) {
 696         if (start[i] == '\\' && (start[i + 1] == '\\'
 697                                  || (quote && start[i + 1] == quote)))
 698             *resp++ = start[++i];
 699         else
 700             *resp++ = start[i];
 701     }
 702
 703     *resp++ = '\0';
 704 #if RESOLVE_ENV_PER_TOKEN
 705     return (char *)ap_resolve_env(p,result);
 706 #else
 707     return result;
 708 #endif
 709 }
 710
 711 AP_DECLARE(char *) ap_getword_conf_nc(apr_pool_t *p, char **line)
 712 {
 713     return ap_getword_conf(p, (const char **) line);
 714 }
 715
 716 AP_DECLARE(char *) ap_getword_conf(apr_pool_t *p, const char **line)
 717 {
 718     const char *str = *line, *strend;
 719     char *res;
 720     char quote;
 721
 722     while (*str && apr_isspace(*str))
 723         ++str;
 724
 725     if (!*str) {
 726         *line = str;
 727         return "";
 728     }
 729
 730     if ((quote = *str) == '"' || quote == '\'') {
 731         strend = str + 1;
 732         while (*strend && *strend != quote) {
 733             if (*strend == '\\' && strend[1] &&
 734                 (strend[1] == quote || strend[1] == '\\')) {
 735                 strend += 2;
 736             }
 737             else {
 738                 ++strend;
 739             }
 740         }
 741         res = substring_conf(p, str + 1, strend - str - 1, quote);
 742
 743         if (*strend == quote)
 744             ++strend;
 745     }
 746     else {
 747         strend = str;
 748         while (*strend && !apr_isspace(*strend))
 749             ++strend;
 750
 751         res = substring_conf(p, str, strend - str, 0);
 752     }
 753
 754     while (*strend && apr_isspace(*strend))
 755         ++strend;
 756     *line = strend;
 757     return res;
 758 }
 759
 760 AP_DECLARE(int) ap_cfg_closefile(ap_configfile_t *cfp)
 761 {
 762 #ifdef DEBUG
 763     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
 764         "Done with config file %s", cfp->name);
 765 #endif
 766     return (cfp->close == NULL) ? 0 : cfp->close(cfp->param);
 767 }
 768
 769 /* we can't use apr_file_* directly because of linking issues on Windows */
 770 static apr_status_t cfg_close(void *param)
 771 {
 772     return apr_file_close(param);
 773 }
 774
 775 static apr_status_t cfg_getch(char *ch, void *param)
 776 {
 777     return apr_file_getc(ch, param);
 778 }
 779
 780 static apr_status_t cfg_getstr(void *buf, size_t bufsiz, void *param)
 781 {
 782     return apr_file_gets(buf, bufsiz, param);
 783 }
 784
 785 /* Open a ap_configfile_t as FILE, return open ap_configfile_t struct pointer */
 786 AP_DECLARE(apr_status_t) ap_pcfg_openfile(ap_configfile_t **ret_cfg,
 787                                           apr_pool_t *p, const char *name)
 788 {
 789     ap_configfile_t *new_cfg;
 790     apr_file_t *file = NULL;
 791     apr_finfo_t finfo;
 792     apr_status_t status;
 793 #ifdef DEBUG
 794     char buf[120];
 795 #endif
 796
 797     if (name == NULL) {
 798         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL,
 799                "Internal error: pcfg_openfile() called with NULL filename");
 800         return APR_EBADF;
 801     }
 802
 803     status = apr_file_open(&file, name, APR_READ | APR_BUFFERED,
 804                            APR_OS_DEFAULT, p);
 805 #ifdef DEBUG
 806     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
 807                 "Opening config file %s (%s)",
 808                 name, (status != APR_SUCCESS) ?
 809                 apr_strerror(status, buf, sizeof(buf)) : "successful");
 810 #endif
 811     if (status != APR_SUCCESS)
 812         return status;
 813
 814     status = apr_file_info_get(&finfo, APR_FINFO_TYPE, file);
 815     if (status != APR_SUCCESS)
 816         return status;
 817
 818     if (finfo.filetype != APR_REG &&
 819 #if defined(WIN32) || defined(OS2) || defined(NETWARE)
 820         strcasecmp(apr_filepath_name_get(name), "nul") != 0) {
 821 #else
 822         strcmp(name, "/dev/null") != 0) {
 823 #endif /* WIN32 || OS2 */
 824         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL,
 825                      "Access to file %s denied by server: not a regular file",
 826                      name);
 827         apr_file_close(file);
 828         return APR_EBADF;
 829     }
 830
 831 #ifdef WIN32
 832     /* Some twisted character [no pun intended] at MS decided that a
 833      * zero width joiner as the lead wide character would be ideal for
 834      * describing Unicode text files.  This was further convoluted to
 835      * another MSism that the same character mapped into utf-8, EF BB BF
 836      * would signify utf-8 text files.
 837      *
 838      * Since MS configuration files are all protecting utf-8 encoded
 839      * Unicode path, file and resource names, we already have the correct
 840      * WinNT encoding.  But at least eat the stupid three bytes up front.
 841      */
 842     {
 843         unsigned char buf[4];
 844         apr_size_t len = 3;
 845         status = apr_file_read(file, buf, &len);
 846         if ((status != APR_SUCCESS) || (len < 3)
 847               || memcmp(buf, "\xEF\xBB\xBF", 3) != 0) {
 848             apr_off_t zero = 0;
 849             apr_file_seek(file, APR_SET, &zero);
 850         }
 851     }
 852 #endif
 853
 854     new_cfg = apr_palloc(p, sizeof(*new_cfg));
 855     new_cfg->param = file;
 856     new_cfg->name = apr_pstrdup(p, name);
 857     new_cfg->getch = cfg_getch;
 858     new_cfg->getstr = cfg_getstr;
 859     new_cfg->close = cfg_close;
 860     new_cfg->line_number = 0;
 861     *ret_cfg = new_cfg;
 862     return APR_SUCCESS;
 863 }
 864
 865
 866 /* Allocate a ap_configfile_t handle with user defined functions and params */
 867 AP_DECLARE(ap_configfile_t *) ap_pcfg_open_custom(
 868             apr_pool_t *p, const char *descr, void *param,
 869             apr_status_t (*getc_func) (char *ch, void *param),
 870             apr_status_t (*gets_func) (void *buf, size_t bufsize, void *param),
 871             apr_status_t (*close_func) (void *param))
 872 {
 873     ap_configfile_t *new_cfg = apr_palloc(p, sizeof(*new_cfg));
 874     new_cfg->param = param;
 875     new_cfg->name = descr;
 876     new_cfg->getch = getc_func;
 877     new_cfg->getstr = gets_func;
 878     new_cfg->close = close_func;
 879     new_cfg->line_number = 0;
 880     return new_cfg;
 881 }
 882
 883 /* Read one character from a configfile_t */
 884 AP_DECLARE(apr_status_t) ap_cfg_getc(char *ch, ap_configfile_t *cfp)
 885 {
 886     apr_status_t rc = cfp->getch(ch, cfp->param);
 887     if (rc == APR_SUCCESS && *ch == LF)
 888         ++cfp->line_number;
 889     return rc;
 890 }
 891
 892 AP_DECLARE(const char *) ap_pcfg_strerror(apr_pool_t *p, ap_configfile_t *cfp,
 893                                           apr_status_t rc)
 894 {
 895     char buf[MAX_STRING_LEN];
 896     if (rc == APR_SUCCESS)
 897         return NULL;
 898     return apr_psprintf(p, "Error reading %s at line %d: %s",
 899                         cfp->name, cfp->line_number,
 900                         rc == APR_ENOSPC ? "Line too long"
 901                                          : apr_strerror(rc, buf, sizeof(buf)));
 902 }
 903
 904 /* Read one line from open ap_configfile_t, strip LF, increase line number */
 905 /* If custom handler does not define a getstr() function, read char by char */
 906 AP_DECLARE(apr_status_t) ap_cfg_getline(char *buf, size_t bufsize, ap_configfile_t *cfp)
 907 {
 908     apr_status_t rc;
 909     char *src, *dst;
 910     /* If a "get string" function is defined, use it */
 911     if (cfp->getstr != NULL) {
 912         char *cp;
 913         char *cbuf = buf;
 914         size_t cbufsize = bufsize;
 915
 916         while (1) {
 917             ++cfp->line_number;
 918             rc = cfp->getstr(cbuf, cbufsize, cfp->param);
 919             if (rc == APR_EOF) {
 920                 if (cbuf != buf) {
 921                     *cbuf = '\0';
 922                     break;
 923                 }
 924                 else {
 925                     return APR_EOF;
 926                 }
 927             }
 928             if (rc != APR_SUCCESS) {
 929                 return rc;
 930             }
 931
 932             /*
 933              *  check for line continuation,
 934              *  i.e. match [^\\]\\[\r]\n only
 935              */
 936             cp = cbuf;
 937             cp += strlen(cp);
 938             if (cp > cbuf && cp[-1] == LF) {
 939                 cp--;
 940                 if (cp > cbuf && cp[-1] == CR)
 941                     cp--;
 942                 if (cp > cbuf && cp[-1] == '\\') {
 943                     cp--;
 944                     /*
 945                      * line continuation requested -
 946                      * then remove backslash and continue
 947                      */
 948                     cbufsize -= (cp-cbuf);
 949                     cbuf = cp;
 950                     continue;
 951                 }
 952             }
 953             else if (cp - buf >= bufsize - 1) {
 954                 return APR_ENOSPC;
 955             }
 956             break;
 957         }
 958     } else {
 959         /* No "get string" function defined; read character by character */
 960         size_t i = 0;
 961
 962         if (bufsize < 2) {
 963             /* too small, assume caller is crazy */
 964             return APR_EINVAL;
 965         }
 966         buf[0] = '\0';
 967
 968         while (1) {
 969             char c;
 970             rc = cfp->getch(&c, cfp->param);
 971             if (rc == APR_EOF) {
 972                 if (i > 0)
 973                     break;
 974                 else
 975                     return APR_EOF;
 976             }
 977             if (rc != APR_SUCCESS)
 978                 return rc;
 979             if (c == LF) {
 980                 ++cfp->line_number;
 981                 /* check for line continuation */
 982                 if (i > 0 && buf[i-1] == '\\') {
 983                     i--;
 984                     continue;
 985                 }
 986                 else {
 987                     break;
 988                 }
 989             }
 990             else if (i >= bufsize - 2) {
 991                 return APR_ENOSPC;
 992             }
 993             buf[i] = c;
 994             ++i;
 995         }
 996         buf[i] = '\0';
 997     }
 998
 999     /*
1000      * Leading and trailing white space is eliminated completely
1001      */
1002     src = buf;
1003     while (apr_isspace(*src))
1004         ++src;
1005     /* blast trailing whitespace */
1006     dst = &src[strlen(src)];
1007     while (--dst >= src && apr_isspace(*dst))
1008         *dst = '\0';
1009     /* Zap leading whitespace by shifting */
1010     if (src != buf)
1011         memmove(buf, src, dst - src + 2);
1012
1013 #ifdef DEBUG_CFG_LINES
1014     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, NULL, "Read config: '%s'", buf);
1015 #endif
1016     return APR_SUCCESS;
1017 }
1018
1019 /* Size an HTTP header field list item, as separated by a comma.
1020  * The return value is a pointer to the beginning of the non-empty list item
1021  * within the original string (or NULL if there is none) and the address
1022  * of field is shifted to the next non-comma, non-whitespace character.
1023  * len is the length of the item excluding any beginning whitespace.
1024  */
1025 AP_DECLARE(const char *) ap_size_list_item(const char **field, int *len)
1026 {
1027     const unsigned char *ptr = (const unsigned char *)*field;
1028     const unsigned char *token;
1029     int in_qpair, in_qstr, in_com;
1030
1031     /* Find first non-comma, non-whitespace byte */
1032
1033     while (*ptr == ',' || apr_isspace(*ptr))
1034         ++ptr;
1035
1036     token = ptr;
1037
1038     /* Find the end of this item, skipping over dead bits */
1039
1040     for (in_qpair = in_qstr = in_com = 0;
1041          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1042          ++ptr) {
1043
1044         if (in_qpair) {
1045             in_qpair = 0;
1046         }
1047         else {
1048             switch (*ptr) {
1049                 case '\\': in_qpair = 1;      /* quoted-pair         */
1050                            break;
1051                 case '"' : if (!in_com)       /* quoted string delim */
1052                                in_qstr = !in_qstr;
1053                            break;
1054                 case '(' : if (!in_qstr)      /* comment (may nest)  */
1055                                ++in_com;
1056                            break;
1057                 case ')' : if (in_com)        /* end comment         */
1058                                --in_com;
1059                            break;
1060                 default  : break;
1061             }
1062         }
1063     }
1064
1065     if ((*len = (ptr - token)) == 0) {
1066         *field = (const char *)ptr;
1067         return NULL;
1068     }
1069
1070     /* Advance field pointer to the next non-comma, non-white byte */
1071
1072     while (*ptr == ',' || apr_isspace(*ptr))
1073         ++ptr;
1074
1075     *field = (const char *)ptr;
1076     return (const char *)token;
1077 }
1078
1079 /* Retrieve an HTTP header field list item, as separated by a comma,
1080  * while stripping insignificant whitespace and lowercasing anything not in
1081  * a quoted string or comment.  The return value is a new string containing
1082  * the converted list item (or NULL if none) and the address pointed to by
1083  * field is shifted to the next non-comma, non-whitespace.
1084  */
1085 AP_DECLARE(char *) ap_get_list_item(apr_pool_t *p, const char **field)
1086 {
1087     const char *tok_start;
1088     const unsigned char *ptr;
1089     unsigned char *pos;
1090     char *token;
1091     int addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0, tok_len = 0;
1092
1093     /* Find the beginning and maximum length of the list item so that
1094      * we can allocate a buffer for the new string and reset the field.
1095      */
1096     if ((tok_start = ap_size_list_item(field, &tok_len)) == NULL) {
1097         return NULL;
1098     }
1099     token = apr_palloc(p, tok_len + 1);
1100
1101     /* Scan the token again, but this time copy only the good bytes.
1102      * We skip extra whitespace and any whitespace around a '=', '/',
1103      * or ';' and lowercase normal characters not within a comment,
1104      * quoted-string or quoted-pair.
1105      */
1106     for (ptr = (const unsigned char *)tok_start, pos = (unsigned char *)token;
1107          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1108          ++ptr) {
1109
1110         if (in_qpair) {
1111             in_qpair = 0;
1112             *pos++ = *ptr;
1113         }
1114         else {
1115             switch (*ptr) {
1116                 case '\\': in_qpair = 1;
1117                            if (addspace == 1)
1118                                *pos++ = ' ';
1119                            *pos++ = *ptr;
1120                            addspace = 0;
1121                            break;
1122                 case '"' : if (!in_com)
1123                                in_qstr = !in_qstr;
1124                            if (addspace == 1)
1125                                *pos++ = ' ';
1126                            *pos++ = *ptr;
1127                            addspace = 0;
1128                            break;
1129                 case '(' : if (!in_qstr)
1130                                ++in_com;
1131                            if (addspace == 1)
1132                                *pos++ = ' ';
1133                            *pos++ = *ptr;
1134                            addspace = 0;
1135                            break;
1136                 case ')' : if (in_com)
1137                                --in_com;
1138                            *pos++ = *ptr;
1139                            addspace = 0;
1140                            break;
1141                 case ' ' :
1142                 case '\t': if (addspace)
1143                                break;
1144                            if (in_com || in_qstr)
1145                                *pos++ = *ptr;
1146                            else
1147                                addspace = 1;
1148                            break;
1149                 case '=' :
1150                 case '/' :
1151                 case ';' : if (!(in_com || in_qstr))
1152                                addspace = -1;
1153                            *pos++ = *ptr;
1154                            break;
1155                 default  : if (addspace == 1)
1156                                *pos++ = ' ';
1157                            *pos++ = (in_com || in_qstr) ? *ptr
1158                                                         : apr_tolower(*ptr);
1159                            addspace = 0;
1160                            break;
1161             }
1162         }
1163     }
1164     *pos = '\0';
1165
1166     return token;
1167 }
1168
1169 /* Find an item in canonical form (lowercase, no extra spaces) within
1170  * an HTTP field value list.  Returns 1 if found, 0 if not found.
1171  * This would be much more efficient if we stored header fields as
1172  * an array of list items as they are received instead of a plain string.
1173  */
1174 AP_DECLARE(int) ap_find_list_item(apr_pool_t *p, const char *line,
1175                                   const char *tok)
1176 {
1177     const unsigned char *pos;
1178     const unsigned char *ptr = (const unsigned char *)line;
1179     int good = 0, addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0;
1180
1181     if (!line || !tok)
1182         return 0;
1183
1184     do {  /* loop for each item in line's list */
1185
1186         /* Find first non-comma, non-whitespace byte */
1187
1188         while (*ptr == ',' || apr_isspace(*ptr))
1189             ++ptr;
1190
1191         if (*ptr)
1192             good = 1;  /* until proven otherwise for this item */
1193         else
1194             break;     /* no items left and nothing good found */
1195
1196         /* We skip extra whitespace and any whitespace around a '=', '/',
1197          * or ';' and lowercase normal characters not within a comment,
1198          * quoted-string or quoted-pair.
1199          */
1200         for (pos = (const unsigned char *)tok;
1201              *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1202              ++ptr) {
1203
1204             if (in_qpair) {
1205                 in_qpair = 0;
1206                 if (good)
1207                     good = (*pos++ == *ptr);
1208             }
1209             else {
1210                 switch (*ptr) {
1211                     case '\\': in_qpair = 1;
1212                                if (addspace == 1)
1213                                    good = good && (*pos++ == ' ');
1214                                good = good && (*pos++ == *ptr);
1215                                addspace = 0;
1216                                break;
1217                     case '"' : if (!in_com)
1218                                    in_qstr = !in_qstr;
1219                                if (addspace == 1)
1220                                    good = good && (*pos++ == ' ');
1221                                good = good && (*pos++ == *ptr);
1222                                addspace = 0;
1223                                break;
1224                     case '(' : if (!in_qstr)
1225                                    ++in_com;
1226                                if (addspace == 1)
1227                                    good = good && (*pos++ == ' ');
1228                                good = good && (*pos++ == *ptr);
1229                                addspace = 0;
1230                                break;
1231                     case ')' : if (in_com)
1232                                    --in_com;
1233                                good = good && (*pos++ == *ptr);
1234                                addspace = 0;
1235                                break;
1236                     case ' ' :
1237                     case '\t': if (addspace || !good)
1238                                    break;
1239                                if (in_com || in_qstr)
1240                                    good = (*pos++ == *ptr);
1241                                else
1242                                    addspace = 1;
1243                                break;
1244                     case '=' :
1245                     case '/' :
1246                     case ';' : if (!(in_com || in_qstr))
1247                                    addspace = -1;
1248                                good = good && (*pos++ == *ptr);
1249                                break;
1250                     default  : if (!good)
1251                                    break;
1252                                if (addspace == 1)
1253                                    good = (*pos++ == ' ');
1254                                if (in_com || in_qstr)
1255                                    good = good && (*pos++ == *ptr);
1256                                else
1257                                    good = good && (*pos++ == apr_tolower(*ptr));
1258                                addspace = 0;
1259                                break;
1260                 }
1261             }
1262         }
1263         if (good && *pos)
1264             good = 0;          /* not good if only a prefix was matched */
1265
1266     } while (*ptr && !good);
1267
1268     return good;
1269 }
1270
1271
1272 /* Retrieve a token, spacing over it and returning a pointer to
1273  * the first non-white byte afterwards.  Note that these tokens
1274  * are delimited by semis and commas; and can also be delimited
1275  * by whitespace at the caller's option.
1276  */
1277
1278 AP_DECLARE(char *) ap_get_token(apr_pool_t *p, const char **accept_line,
1279                                 int accept_white)
1280 {
1281     const char *ptr = *accept_line;
1282     const char *tok_start;
1283     char *token;
1284     int tok_len;
1285
1286     /* Find first non-white byte */
1287
1288     while (*ptr && apr_isspace(*ptr))
1289         ++ptr;
1290
1291     tok_start = ptr;
1292
1293     /* find token end, skipping over quoted strings.
1294      * (comments are already gone).
1295      */
1296
1297     while (*ptr && (accept_white || !apr_isspace(*ptr))
1298            && *ptr != ';' && *ptr != ',') {
1299         if (*ptr++ == '"')
1300             while (*ptr)
1301                 if (*ptr++ == '"')
1302                     break;
1303     }
1304
1305     tok_len = ptr - tok_start;
1306     token = apr_pstrndup(p, tok_start, tok_len);
1307
1308     /* Advance accept_line pointer to the next non-white byte */
1309
1310     while (*ptr && apr_isspace(*ptr))
1311         ++ptr;
1312
1313     *accept_line = ptr;
1314     return token;
1315 }
1316
1317
1318 /* find http tokens, see the definition of token from RFC2068 */
1319 AP_DECLARE(int) ap_find_token(apr_pool_t *p, const char *line, const char *tok)
1320 {
1321     const unsigned char *start_token;
1322     const unsigned char *s;
1323
1324     if (!line)
1325         return 0;
1326
1327     s = (const unsigned char *)line;
1328     for (;;) {
1329         /* find start of token, skip all stop characters, note NUL
1330          * isn't a token stop, so we don't need to test for it
1331          */
1332         while (TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1333             ++s;
1334         }
1335         if (!*s) {
1336             return 0;
1337         }
1338         start_token = s;
1339         /* find end of the token */
1340         while (*s && !TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1341             ++s;
1342         }
1343         if (!strncasecmp((const char *)start_token, (const char *)tok,
1344                          s - start_token)) {
1345             return 1;
1346         }
1347         if (!*s) {
1348             return 0;
1349         }
1350     }
1351 }
1352
1353
1354 AP_DECLARE(int) ap_find_last_token(apr_pool_t *p, const char *line,
1355                                    const char *tok)
1356 {
1357     int llen, tlen, lidx;
1358
1359     if (!line)
1360         return 0;
1361
1362     llen = strlen(line);
1363     tlen = strlen(tok);
1364     lidx = llen - tlen;
1365
1366     if (lidx < 0 ||
1367         (lidx > 0 && !(apr_isspace(line[lidx - 1]) || line[lidx - 1] == ',')))
1368         return 0;
1369
1370     return (strncasecmp(&line[lidx], tok, tlen) == 0);
1371 }
1372
1373 AP_DECLARE(char *) ap_escape_shell_cmd(apr_pool_t *p, const char *str)
1374 {
1375     char *cmd;
1376     unsigned char *d;
1377     const unsigned char *s;
1378
1379     cmd = apr_palloc(p, 2 * strlen(str) + 1);        /* Be safe */
1380     d = (unsigned char *)cmd;
1381     s = (const unsigned char *)str;
1382     for (; *s; ++s) {
1383
1384 #if defined(OS2) || defined(WIN32)
1385         /*
1386          * Newlines to Win32/OS2 CreateProcess() are ill advised.
1387          * Convert them to spaces since they are effectively white
1388          * space to most applications
1389          */
1390         if (*s == '\r' || *s == '\n') {
1391              *d++ = ' ';
1392              continue;
1393          }
1394 #endif
1395
1396         if (TEST_CHAR(*s, T_ESCAPE_SHELL_CMD)) {
1397             *d++ = '\\';
1398         }
1399         *d++ = *s;
1400     }
1401     *d = '\0';
1402
1403     return cmd;
1404 }
1405
1406 static char x2c(const char *what)
1407 {
1408     register char digit;
1409
1410 #if !APR_CHARSET_EBCDIC
1411     digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10
1412              : (what[0] - '0'));
1413     digit *= 16;
1414     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10
1415               : (what[1] - '0'));
1416 #else /*APR_CHARSET_EBCDIC*/
1417     char xstr[5];
1418     xstr[0]='0';
1419     xstr[1]='x';
1420     xstr[2]=what[0];
1421     xstr[3]=what[1];
1422     xstr[4]='\0';
1423     digit = apr_xlate_conv_byte(ap_hdrs_from_ascii,
1424                                 0xFF & strtol(xstr, NULL, 16));
1425 #endif /*APR_CHARSET_EBCDIC*/
1426     return (digit);
1427 }
1428
1429 /*
1430  * Unescapes a URL, leaving reserved characters intact.
1431  * Returns 0 on success, non-zero on error
1432  * Failure is due to
1433  *   bad % escape       returns HTTP_BAD_REQUEST
1434  *
1435  *   decoding %00 or a forbidden character returns HTTP_NOT_FOUND
1436  */
1437
1438 static int unescape_url(char *url, const char *forbid, const char *reserved)
1439 {
1440     register int badesc, badpath;
1441     char *x, *y;
1442
1443     badesc = 0;
1444     badpath = 0;
1445     /* Initial scan for first '%'. Don't bother writing values before
1446      * seeing a '%' */
1447     y = strchr(url, '%');
1448     if (y == NULL) {
1449         return OK;
1450     }
1451     for (x = y; *y; ++x, ++y) {
1452         if (*y != '%') {
1453             *x = *y;
1454         }
1455         else {
1456             if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
1457                 badesc = 1;
1458                 *x = '%';
1459             }
1460             else {
1461                 char decoded;
1462                 decoded = x2c(y + 1);
1463                 if ((decoded == '\0')
1464                     || (forbid && ap_strchr_c(forbid, decoded))) {
1465                     badpath = 1;
1466                     *x = decoded;
1467                     y += 2;
1468                 }
1469                 else if (reserved && ap_strchr_c(reserved, decoded)) {
1470                     *x++ = *y++;
1471                     *x++ = *y++;
1472                     *x = *y;
1473                 }
1474                 else {
1475                     *x = decoded;
1476                     y += 2;
1477                 }
1478             }
1479         }
1480     }
1481     *x = '\0';
1482     if (badesc) {
1483         return HTTP_BAD_REQUEST;
1484     }
1485     else if (badpath) {
1486         return HTTP_NOT_FOUND;
1487     }
1488     else {
1489         return OK;
1490     }
1491 }
1492 AP_DECLARE(int) ap_unescape_url(char *url)
1493 {
1494     /* Traditional */
1495     return unescape_url(url, SLASHES, NULL);
1496 }
1497 AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes)
1498 {
1499     /* AllowEncodedSlashes (corrected) */
1500     if (decode_slashes) {
1501         /* no chars reserved */
1502         return unescape_url(url, NULL, NULL);
1503     } else {
1504         /* reserve (do not decode) encoded slashes */
1505         return unescape_url(url, NULL, SLASHES);
1506     }
1507 }
1508 #ifdef NEW_APIS
1509 /* IFDEF these out until they've been thought through.
1510  * Just a germ of an API extension for now
1511  */
1512 AP_DECLARE(int) ap_unescape_url_proxy(char *url)
1513 {
1514     /* leave RFC1738 reserved characters intact, * so proxied URLs
1515      * don't get mangled.  Where does that leave encoded '&' ?
1516      */
1517     return unescape_url(url, NULL, "/;?");
1518 }
1519 AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved)
1520 {
1521     return unescape_url(url, NULL, reserved);
1522 }
1523 #endif
1524
1525 AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname,
1526                                        apr_port_t port, const request_rec *r)
1527 {
1528     if (ap_is_default_port(port, r)) {
1529         return apr_pstrdup(p, hostname);
1530     }
1531     else {
1532         return apr_psprintf(p, "%s:%u", hostname, port);
1533     }
1534 }
1535
1536 AP_DECLARE(int) ap_unescape_all(char *url)
1537 {
1538     return unescape_url(url, NULL, NULL);
1539 }
1540
1541 /* c2x takes an unsigned, and expects the caller has guaranteed that
1542  * 0 <= what < 256... which usually means that you have to cast to
1543  * unsigned char first, because (unsigned)(char)(x) first goes through
1544  * signed extension to an int before the unsigned cast.
1545  *
1546  * The reason for this assumption is to assist gcc code generation --
1547  * the unsigned char -> unsigned extension is already done earlier in
1548  * both uses of this code, so there's no need to waste time doing it
1549  * again.
1550  */
1551 static const char c2x_table[] = "0123456789abcdef";
1552
1553 static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix,
1554                                      unsigned char *where)
1555 {
1556 #if APR_CHARSET_EBCDIC
1557     what = apr_xlate_conv_byte(ap_hdrs_to_ascii, (unsigned char)what);
1558 #endif /*APR_CHARSET_EBCDIC*/
1559     *where++ = prefix;
1560     *where++ = c2x_table[what >> 4];
1561     *where++ = c2x_table[what & 0xf];
1562     return where;
1563 }
1564
1565 /*
1566  * escape_path_segment() escapes a path segment, as defined in RFC 1808. This
1567  * routine is (should be) OS independent.
1568  *
1569  * os_escape_path() converts an OS path to a URL, in an OS dependent way. In all
1570  * cases if a ':' occurs before the first '/' in the URL, the URL should be
1571  * prefixed with "./" (or the ':' escaped). In the case of Unix, this means
1572  * leaving '/' alone, but otherwise doing what escape_path_segment() does. For
1573  * efficiency reasons, we don't use escape_path_segment(), which is provided for
1574  * reference. Again, RFC 1808 is where this stuff is defined.
1575  *
1576  * If partial is set, os_escape_path() assumes that the path will be appended to
1577  * something with a '/' in it (and thus does not prefix "./").
1578  */
1579
1580 AP_DECLARE(char *) ap_escape_path_segment_buffer(char *copy, const char *segment)
1581 {
1582     const unsigned char *s = (const unsigned char *)segment;
1583     unsigned char *d = (unsigned char *)copy;
1584     unsigned c;
1585
1586     while ((c = *s)) {
1587         if (TEST_CHAR(c, T_ESCAPE_PATH_SEGMENT)) {
1588             d = c2x(c, '%', d);
1589         }
1590         else {
1591             *d++ = c;
1592         }
1593         ++s;
1594     }
1595     *d = '\0';
1596     return copy;
1597 }
1598
1599 AP_DECLARE(char *) ap_escape_path_segment(apr_pool_t *p, const char *segment)
1600 {
1601     return ap_escape_path_segment_buffer(apr_palloc(p, 3 * strlen(segment) + 1), segment);
1602 }
1603
1604 AP_DECLARE(char *) ap_os_escape_path(apr_pool_t *p, const char *path, int partial)
1605 {
1606     char *copy = apr_palloc(p, 3 * strlen(path) + 3);
1607     const unsigned char *s = (const unsigned char *)path;
1608     unsigned char *d = (unsigned char *)copy;
1609     unsigned c;
1610
1611     if (!partial) {
1612         const char *colon = ap_strchr_c(path, ':');
1613         const char *slash = ap_strchr_c(path, '/');
1614
1615         if (colon && (!slash || colon < slash)) {
1616             *d++ = '.';
1617             *d++ = '/';
1618         }
1619     }
1620     while ((c = *s)) {
1621         if (TEST_CHAR(c, T_OS_ESCAPE_PATH)) {
1622             d = c2x(c, '%', d);
1623         }
1624         else {
1625             *d++ = c;
1626         }
1627         ++s;
1628     }
1629     *d = '\0';
1630     return copy;
1631 }
1632
1633 /* ap_escape_uri is now a macro for os_escape_path */
1634
1635 AP_DECLARE(char *) ap_escape_html2(apr_pool_t *p, const char *s, int toasc)
1636 {
1637     int i, j;
1638     char *x;
1639
1640     /* first, count the number of extra characters */
1641     for (i = 0, j = 0; s[i] != '\0'; i++)
1642         if (s[i] == '<' || s[i] == '>')
1643             j += 3;
1644         else if (s[i] == '&')
1645             j += 4;
1646         else if (s[i] == '"')
1647             j += 5;
1648         else if (toasc && !apr_isascii(s[i]))
1649             j += 5;
1650
1651     if (j == 0)
1652         return apr_pstrmemdup(p, s, i);
1653
1654     x = apr_palloc(p, i + j + 1);
1655     for (i = 0, j = 0; s[i] != '\0'; i++, j++)
1656         if (s[i] == '<') {
1657             memcpy(&x[j], "&lt;", 4);
1658             j += 3;
1659         }
1660         else if (s[i] == '>') {
1661             memcpy(&x[j], "&gt;", 4);
1662             j += 3;
1663         }
1664         else if (s[i] == '&') {
1665             memcpy(&x[j], "&amp;", 5);
1666             j += 4;
1667         }
1668         else if (s[i] == '"') {
1669             memcpy(&x[j], "&quot;", 6);
1670             j += 5;
1671         }
1672         else if (toasc && !apr_isascii(s[i])) {
1673             char *esc = apr_psprintf(p, "&#%3.3d;", (unsigned char)s[i]);
1674             memcpy(&x[j], esc, 6);
1675             j += 5;
1676         }
1677         else
1678             x[j] = s[i];
1679
1680     x[j] = '\0';
1681     return x;
1682 }
1683 AP_DECLARE(char *) ap_escape_logitem(apr_pool_t *p, const char *str)
1684 {
1685     char *ret;
1686     unsigned char *d;
1687     const unsigned char *s;
1688
1689     if (!str) {
1690         return NULL;
1691     }
1692
1693     ret = apr_palloc(p, 4 * strlen(str) + 1); /* Be safe */
1694     d = (unsigned char *)ret;
1695     s = (const unsigned char *)str;
1696     for (; *s; ++s) {
1697
1698         if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
1699             *d++ = '\\';
1700             switch(*s) {
1701             case '\b':
1702                 *d++ = 'b';
1703                 break;
1704             case '\n':
1705                 *d++ = 'n';
1706                 break;
1707             case '\r':
1708                 *d++ = 'r';
1709                 break;
1710             case '\t':
1711                 *d++ = 't';
1712                 break;
1713             case '\v':
1714                 *d++ = 'v';
1715                 break;
1716             case '\\':
1717             case '"':
1718                 *d++ = *s;
1719                 break;
1720             default:
1721                 c2x(*s, 'x', d);
1722                 d += 3;
1723             }
1724         }
1725         else {
1726             *d++ = *s;
1727         }
1728     }
1729     *d = '\0';
1730
1731     return ret;
1732 }
1733
1734 AP_DECLARE(apr_size_t) ap_escape_errorlog_item(char *dest, const char *source,
1735                                                apr_size_t buflen)
1736 {
1737     unsigned char *d, *ep;
1738     const unsigned char *s;
1739
1740     if (!source || !buflen) { /* be safe */
1741         return 0;
1742     }
1743
1744     d = (unsigned char *)dest;
1745     s = (const unsigned char *)source;
1746     ep = d + buflen - 1;
1747
1748     for (; d < ep && *s; ++s) {
1749
1750         if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
1751             *d++ = '\\';
1752             if (d >= ep) {
1753                 --d;
1754                 break;
1755             }
1756
1757             switch(*s) {
1758             case '\b':
1759                 *d++ = 'b';
1760                 break;
1761             case '\n':
1762                 *d++ = 'n';
1763                 break;
1764             case '\r':
1765                 *d++ = 'r';
1766                 break;
1767             case '\t':
1768                 *d++ = 't';
1769                 break;
1770             case '\v':
1771                 *d++ = 'v';
1772                 break;
1773             case '\\':
1774                 *d++ = *s;
1775                 break;
1776             case '"': /* no need for this in error log */
1777                 d[-1] = *s;
1778                 break;
1779             default:
1780                 if (d >= ep - 2) {
1781                     ep = --d; /* break the for loop as well */
1782                     break;
1783                 }
1784                 c2x(*s, 'x', d);
1785                 d += 3;
1786             }
1787         }
1788         else {
1789             *d++ = *s;
1790         }
1791     }
1792     *d = '\0';
1793
1794     return (d - (unsigned char *)dest);
1795 }
1796
1797 AP_DECLARE(int) ap_is_directory(apr_pool_t *p, const char *path)
1798 {
1799     apr_finfo_t finfo;
1800
1801     if (apr_stat(&finfo, path, APR_FINFO_TYPE, p) != APR_SUCCESS)
1802         return 0;                /* in error condition, just return no */
1803
1804     return (finfo.filetype == APR_DIR);
1805 }
1806
1807 AP_DECLARE(int) ap_is_rdirectory(apr_pool_t *p, const char *path)
1808 {
1809     apr_finfo_t finfo;
1810
1811     if (apr_stat(&finfo, path, APR_FINFO_LINK | APR_FINFO_TYPE, p) != APR_SUCCESS)
1812         return 0;                /* in error condition, just return no */
1813
1814     return (finfo.filetype == APR_DIR);
1815 }
1816
1817 AP_DECLARE(char *) ap_make_full_path(apr_pool_t *a, const char *src1,
1818                                   const char *src2)
1819 {
1820     apr_size_t len1, len2;
1821     char *path;
1822
1823     len1 = strlen(src1);
1824     len2 = strlen(src2);
1825      /* allocate +3 for '/' delimiter, trailing NULL and overallocate
1826       * one extra byte to allow the caller to add a trailing '/'
1827       */
1828     path = (char *)apr_palloc(a, len1 + len2 + 3);
1829     if (len1 == 0) {
1830         *path = '/';
1831         memcpy(path + 1, src2, len2 + 1);
1832     }
1833     else {
1834         char *next;
1835         memcpy(path, src1, len1);
1836         next = path + len1;
1837         if (next[-1] != '/') {
1838             *next++ = '/';
1839         }
1840         memcpy(next, src2, len2 + 1);
1841     }
1842     return path;
1843 }
1844
1845 /*
1846  * Check for an absoluteURI syntax (see section 3.2 in RFC2068).
1847  */
1848 AP_DECLARE(int) ap_is_url(const char *u)
1849 {
1850     register int x;
1851
1852     for (x = 0; u[x] != ':'; x++) {
1853         if ((!u[x]) ||
1854             ((!apr_isalpha(u[x])) && (!apr_isdigit(u[x])) &&
1855              (u[x] != '+') && (u[x] != '-') && (u[x] != '.'))) {
1856             return 0;
1857         }
1858     }
1859
1860     return (x ? 1 : 0);                /* If the first character is ':', it's broken, too */
1861 }
1862
1863 AP_DECLARE(int) ap_ind(const char *s, char c)
1864 {
1865     const char *p = ap_strchr_c(s, c);
1866
1867     if (p == NULL)
1868         return -1;
1869     return p - s;
1870 }
1871
1872 AP_DECLARE(int) ap_rind(const char *s, char c)
1873 {
1874     const char *p = ap_strrchr_c(s, c);
1875
1876     if (p == NULL)
1877         return -1;
1878     return p - s;
1879 }
1880
1881 AP_DECLARE(void) ap_str_tolower(char *str)
1882 {
1883     while (*str) {
1884         *str = apr_tolower(*str);
1885         ++str;
1886     }
1887 }
1888
1889 /*
1890  * We must return a FQDN
1891  */
1892 char *ap_get_local_host(apr_pool_t *a)
1893 {
1894 #ifndef MAXHOSTNAMELEN
1895 #define MAXHOSTNAMELEN 256
1896 #endif
1897     char str[MAXHOSTNAMELEN + 1];
1898     char *server_hostname = NULL;
1899     apr_sockaddr_t *sockaddr;
1900     char *hostname;
1901
1902     if (apr_gethostname(str, sizeof(str) - 1, a) != APR_SUCCESS) {
1903         ap_log_perror(APLOG_MARK, APLOG_STARTUP | APLOG_WARNING, 0, a,
1904                      "%s: apr_gethostname() failed to determine ServerName",
1905                      ap_server_argv0);
1906     } else {
1907         str[sizeof(str) - 1] = '\0';
1908         if (apr_sockaddr_info_get(&sockaddr, str, APR_UNSPEC, 0, 0, a) == APR_SUCCESS) {
1909             if ( (apr_getnameinfo(&hostname, sockaddr, 0) == APR_SUCCESS) &&
1910                 (ap_strchr_c(hostname, '.')) ) {
1911                 server_hostname = apr_pstrdup(a, hostname);
1912                 return server_hostname;
1913             } else if (ap_strchr_c(str, '.')) {
1914                 server_hostname = apr_pstrdup(a, str);
1915             } else {
1916                 apr_sockaddr_ip_get(&hostname, sockaddr);
1917                 server_hostname = apr_pstrdup(a, hostname);
1918             }
1919         } else {
1920             ap_log_perror(APLOG_MARK, APLOG_STARTUP | APLOG_WARNING, 0, a,
1921                          "%s: apr_sockaddr_info_get() failed for %s",
1922                          ap_server_argv0, str);
1923         }
1924     }
1925
1926     if (!server_hostname)
1927         server_hostname = apr_pstrdup(a, "127.0.0.1");
1928
1929     ap_log_perror(APLOG_MARK, APLOG_ALERT|APLOG_STARTUP, 0, a,
1930                  "%s: Could not reliably determine the server's fully qualified "
1931                  "domain name, using %s for ServerName",
1932                  ap_server_argv0, server_hostname);
1933
1934     return server_hostname;
1935 }
1936
1937 /* simple 'pool' alloc()ing glue to apr_base64.c
1938  */
1939 AP_DECLARE(char *) ap_pbase64decode(apr_pool_t *p, const char *bufcoded)
1940 {
1941     char *decoded;
1942     int l;
1943
1944     decoded = (char *) apr_palloc(p, 1 + apr_base64_decode_len(bufcoded));
1945     l = apr_base64_decode(decoded, bufcoded);
1946     decoded[l] = '\0'; /* make binary sequence into string */
1947
1948     return decoded;
1949 }
1950
1951 AP_DECLARE(char *) ap_pbase64encode(apr_pool_t *p, char *string)
1952 {
1953     char *encoded;
1954     int l = strlen(string);
1955
1956     encoded = (char *) apr_palloc(p, 1 + apr_base64_encode_len(l));
1957     l = apr_base64_encode(encoded, string, l);
1958     encoded[l] = '\0'; /* make binary sequence into string */
1959
1960     return encoded;
1961 }
1962
1963 /* we want to downcase the type/subtype for comparison purposes
1964  * but nothing else because ;parameter=foo values are case sensitive.
1965  * XXX: in truth we want to downcase parameter names... but really,
1966  * apache has never handled parameters and such correctly.  You
1967  * also need to compress spaces and such to be able to compare
1968  * properly. -djg
1969  */
1970 AP_DECLARE(void) ap_content_type_tolower(char *str)
1971 {
1972     char *semi;
1973
1974     semi = strchr(str, ';');
1975     if (semi) {
1976         *semi = '\0';
1977     }
1978
1979     ap_str_tolower(str);
1980
1981     if (semi) {
1982         *semi = ';';
1983     }
1984 }
1985
1986 /*
1987  * Given a string, replace any bare " with \" .
1988  */
1989 AP_DECLARE(char *) ap_escape_quotes(apr_pool_t *p, const char *instring)
1990 {
1991     int newlen = 0;
1992     const char *inchr = instring;
1993     char *outchr, *outstring;
1994
1995     /*
1996      * Look through the input string, jogging the length of the output
1997      * string up by an extra byte each time we find an unescaped ".
1998      */
1999     while (*inchr != '\0') {
2000         newlen++;
2001         if (*inchr == '"') {
2002             newlen++;
2003         }
2004         /*
2005          * If we find a slosh, and it's not the last byte in the string,
2006          * it's escaping something - advance past both bytes.
2007          */
2008         if ((*inchr == '\\') && (inchr[1] != '\0')) {
2009             inchr++;
2010             newlen++;
2011         }
2012         inchr++;
2013     }
2014     outstring = apr_palloc(p, newlen + 1);
2015     inchr = instring;
2016     outchr = outstring;
2017     /*
2018      * Now copy the input string to the output string, inserting a slosh
2019      * in front of every " that doesn't already have one.
2020      */
2021     while (*inchr != '\0') {
2022         if ((*inchr == '\\') && (inchr[1] != '\0')) {
2023             *outchr++ = *inchr++;
2024             *outchr++ = *inchr++;
2025         }
2026         if (*inchr == '"') {
2027             *outchr++ = '\\';
2028         }
2029         if (*inchr != '\0') {
2030             *outchr++ = *inchr++;
2031         }
2032     }
2033     *outchr = '\0';
2034     return outstring;
2035 }
2036
2037 /*
2038  * Given a string, append the PID deliminated by delim.
2039  * Usually used to create a pid-appended filepath name
2040  * (eg: /a/b/foo -> /a/b/foo.6726). A function, and not
2041  * a macro, to avoid unistd.h dependency
2042  */
2043 AP_DECLARE(char *) ap_append_pid(apr_pool_t *p, const char *string,
2044                                     const char *delim)
2045 {
2046     return apr_psprintf(p, "%s%s%" APR_PID_T_FMT, string,
2047                         delim, getpid());
2048
2049 }
2050
2051 /**
2052  * Parse a given timeout parameter string into an apr_interval_time_t value.
2053  * The unit of the time interval is given as postfix string to the numeric
2054  * string. Currently the following units are understood:
2055  *
2056  * ms    : milliseconds
2057  * s     : seconds
2058  * mi[n] : minutes
2059  * h     : hours
2060  *
2061  * If no unit is contained in the given timeout parameter the default_time_unit
2062  * will be used instead.
2063  * @param timeout_parameter The string containing the timeout parameter.
2064  * @param timeout The timeout value to be returned.
2065  * @param default_time_unit The default time unit to use if none is specified
2066  * in timeout_parameter.
2067  * @return Status value indicating whether the parsing was successful or not.
2068  */
2069 AP_DECLARE(apr_status_t) ap_timeout_parameter_parse(
2070                                                const char *timeout_parameter,
2071                                                apr_interval_time_t *timeout,
2072                                                const char *default_time_unit)
2073 {
2074     char *endp;
2075     const char *time_str;
2076     apr_int64_t tout;
2077
2078     tout = apr_strtoi64(timeout_parameter, &endp, 10);
2079     if (errno) {
2080         return errno;
2081     }
2082     if (!endp || !*endp) {
2083         time_str = default_time_unit;
2084     }
2085     else {
2086         time_str = endp;
2087     }
2088
2089     switch (*time_str) {
2090         /* Time is in seconds */
2091     case 's':
2092         *timeout = (apr_interval_time_t) apr_time_from_sec(tout);
2093         break;
2094     case 'h':
2095         /* Time is in hours */
2096         *timeout = (apr_interval_time_t) apr_time_from_sec(tout * 3600);
2097         break;
2098     case 'm':
2099         switch (*(++time_str)) {
2100         /* Time is in milliseconds */
2101         case 's':
2102             *timeout = (apr_interval_time_t) tout * 1000;
2103             break;
2104         /* Time is in minutes */
2105         case 'i':
2106             *timeout = (apr_interval_time_t) apr_time_from_sec(tout * 60);
2107             break;
2108         default:
2109             return APR_EGENERAL;
2110         }
2111         break;
2112     default:
2113         return APR_EGENERAL;
2114     }
2115     return APR_SUCCESS;
2116 }
2117
2118 /**
2119  * Determine if a request has a request body or not.
2120  *
2121  * @param r the request_rec of the request
2122  * @return truth value
2123  */
2124 AP_DECLARE(int) ap_request_has_body(request_rec *r)
2125 {
2126     apr_off_t cl;
2127     char *estr;
2128     const char *cls;
2129     int has_body;
2130
2131     has_body = (!r->header_only
2132                 && (r->kept_body
2133                     || apr_table_get(r->headers_in, "Transfer-Encoding")
2134                     || ( (cls = apr_table_get(r->headers_in, "Content-Length"))
2135                         && (apr_strtoff(&cl, cls, &estr, 10) == APR_SUCCESS)
2136                         && (!*estr)
2137                         && (cl > 0) )
2138                     )
2139                 );
2140     return has_body;
2141 }
2142
2143 AP_DECLARE_NONSTD(apr_status_t) ap_pool_cleanup_set_null(void *data_)
2144 {
2145     void **ptr = (void **)data_;
2146     *ptr = NULL;
2147     return APR_SUCCESS;
2148 }
2149
2150 AP_DECLARE(apr_status_t) ap_str2_alnum(const char *src, char *dest) {
2151
2152     for ( ; *src; src++, dest++)
2153     {
2154         if (!apr_isprint(*src))
2155             *dest = 'x';
2156         else if (!apr_isalnum(*src))
2157             *dest = '_';
2158         else
2159             *dest = (char)*src;
2160     }
2161     *dest = '\0';
2162     return APR_SUCCESS;
2163
2164 }
2165
2166 AP_DECLARE(apr_status_t) ap_pstr2_alnum(apr_pool_t *p, const char *src,
2167                                         const char **dest)
2168 {
2169     char *new = apr_palloc(p, strlen(src)+1);
2170     if (!new)
2171         return APR_ENOMEM;
2172     *dest = new;
2173     return ap_str2_alnum(src, new);
2174 }
2175
2176 /**
2177  * Read the body and parse any form found, which must be of the
2178  * type application/x-www-form-urlencoded.
2179  *
2180  * Name/value pairs are returned in an array, with the names as
2181  * strings with a maximum length of HUGE_STRING_LEN, and the
2182  * values as bucket brigades. This allows values to be arbitrarily
2183  * large.
2184  *
2185  * All url-encoding is removed from both the names and the values
2186  * on the fly. The names are interpreted as strings, while the
2187  * values are interpreted as blocks of binary data, that may
2188  * contain the 0 character.
2189  *
2190  * In order to ensure that resource limits are not exceeded, a
2191  * maximum size must be provided. If the sum of the lengths of
2192  * the names and the values exceed this size, this function
2193  * will return HTTP_REQUEST_ENTITY_TOO_LARGE.
2194  *
2195  * An optional number of parameters can be provided, if the number
2196  * of parameters provided exceeds this amount, this function will
2197  * return HTTP_REQUEST_ENTITY_TOO_LARGE. If this value is negative,
2198  * no limit is imposed, and the number of parameters is in turn
2199  * constrained by the size parameter above.
2200  *
2201  * This function honours any kept_body configuration, and the
2202  * original raw request body will be saved to the kept_body brigade
2203  * if so configured, just as ap_discard_request_body does.
2204  *
2205  * NOTE: File upload is not yet supported, but can be without change
2206  * to the function call.
2207  */
2208
2209 /* form parsing stuff */
2210 typedef enum {
2211     FORM_NORMAL,
2212     FORM_AMP,
2213     FORM_NAME,
2214     FORM_VALUE,
2215     FORM_PERCENTA,
2216     FORM_PERCENTB,
2217     FORM_ABORT
2218 } ap_form_type_t;
2219
2220 AP_DECLARE(int) ap_parse_form_data(request_rec *r, ap_filter_t *f,
2221                                    apr_array_header_t **ptr,
2222                                    apr_size_t num, apr_size_t usize)
2223 {
2224     apr_bucket_brigade *bb = NULL;
2225     int seen_eos = 0;
2226     char buffer[HUGE_STRING_LEN + 1];
2227     const char *ct;
2228     apr_size_t offset = 0;
2229     apr_ssize_t size;
2230     ap_form_type_t state = FORM_NAME, percent = FORM_NORMAL;
2231     ap_form_pair_t *pair = NULL;
2232     apr_array_header_t *pairs = apr_array_make(r->pool, 4, sizeof(ap_form_pair_t));
2233
2234     char hi = 0;
2235     char low = 0;
2236
2237     *ptr = pairs;
2238
2239     /* sanity check - we only support forms for now */
2240     ct = apr_table_get(r->headers_in, "Content-Type");
2241     if (!ct || strcmp("application/x-www-form-urlencoded", ct)) {
2242         return ap_discard_request_body(r);
2243     }
2244
2245     if (usize > APR_SIZE_MAX >> 1)
2246         size = APR_SIZE_MAX >> 1;
2247     else
2248         size = usize;
2249
2250     if (!f) {
2251         f = r->input_filters;
2252     }
2253
2254     bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2255     do {
2256         apr_bucket *bucket = NULL, *last = NULL;
2257
2258         int rv = ap_get_brigade(f, bb, AP_MODE_READBYTES,
2259                                 APR_BLOCK_READ, HUGE_STRING_LEN);
2260         if (rv != APR_SUCCESS) {
2261             apr_brigade_destroy(bb);
2262             return (rv == AP_FILTER_ERROR) ? rv : HTTP_BAD_REQUEST;
2263         }
2264
2265         for (bucket = APR_BRIGADE_FIRST(bb);
2266              bucket != APR_BRIGADE_SENTINEL(bb);
2267              last = bucket, bucket = APR_BUCKET_NEXT(bucket)) {
2268             const char *data;
2269             apr_size_t len, slide;
2270
2271             if (last) {
2272                 apr_bucket_delete(last);
2273             }
2274             if (APR_BUCKET_IS_EOS(bucket)) {
2275                 seen_eos = 1;
2276                 break;
2277             }
2278             if (bucket->length == 0) {
2279                 continue;
2280             }
2281
2282             rv = apr_bucket_read(bucket, &data, &len, APR_BLOCK_READ);
2283             if (rv != APR_SUCCESS) {
2284                 apr_brigade_destroy(bb);
2285                 return HTTP_BAD_REQUEST;
2286             }
2287
2288             slide = len;
2289             while (state != FORM_ABORT && slide-- > 0 && size >= 0 && num != 0) {
2290                 char c = *data++;
2291                 if ('+' == c) {
2292                     c = ' ';
2293                 }
2294                 else if ('&' == c) {
2295                     state = FORM_AMP;
2296                 }
2297                 if ('%' == c) {
2298                     percent = FORM_PERCENTA;
2299                     continue;
2300                 }
2301                 if (FORM_PERCENTA == percent) {
2302                     if (c >= 'a') {
2303                         hi = c - 'a' + 10;
2304                     }
2305                     else if (c >= 'A') {
2306                         hi = c - 'A' + 10;
2307                     }
2308                     else if (c >= '0') {
2309                         hi = c - '0';
2310                     }
2311                     hi = hi << 4;
2312                     percent = FORM_PERCENTB;
2313                     continue;
2314                 }
2315                 if (FORM_PERCENTB == percent) {
2316                     if (c >= 'a') {
2317                         low = c - 'a' + 10;
2318                     }
2319                     else if (c >= 'A') {
2320                         low = c - 'A' + 10;
2321                     }
2322                     else if (c >= '0') {
2323                         low = c - '0';
2324                     }
2325                     c = low | hi;
2326                     percent = FORM_NORMAL;
2327                 }
2328                 switch (state) {
2329                     case FORM_AMP:
2330                         if (pair) {
2331                             const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2332                             apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2333                             APR_BRIGADE_INSERT_TAIL(pair->value, b);
2334                         }
2335                         state = FORM_NAME;
2336                         pair = NULL;
2337                         offset = 0;
2338                         num--;
2339                         break;
2340                     case FORM_NAME:
2341                         if (offset < HUGE_STRING_LEN) {
2342                             if ('=' == c) {
2343                                 buffer[offset] = 0;
2344                                 offset = 0;
2345                                 pair = (ap_form_pair_t *) apr_array_push(pairs);
2346                                 pair->name = apr_pstrdup(r->pool, buffer);
2347                                 pair->value = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2348                                 state = FORM_VALUE;
2349                             }
2350                             else {
2351                                 buffer[offset++] = c;
2352                                 size--;
2353                             }
2354                         }
2355                         else {
2356                             state = FORM_ABORT;
2357                         }
2358                         break;
2359                     case FORM_VALUE:
2360                         if (offset >= HUGE_STRING_LEN) {
2361                             const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2362                             apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2363                             APR_BRIGADE_INSERT_TAIL(pair->value, b);
2364                             offset = 0;
2365                         }
2366                         buffer[offset++] = c;
2367                         size--;
2368                         break;
2369                     default:
2370                         break;
2371                 }
2372             }
2373
2374         }
2375
2376         apr_brigade_cleanup(bb);
2377     } while (!seen_eos);
2378
2379     if (FORM_ABORT == state || size < 0 || num == 0) {
2380         return HTTP_REQUEST_ENTITY_TOO_LARGE;
2381     }
2382     else if (FORM_VALUE == state && pair && offset > 0) {
2383         const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2384         apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2385         APR_BRIGADE_INSERT_TAIL(pair->value, b);
2386     }
2387
2388     return OK;
2389
2390 }