granicus.if.org Git - apache/blob - server/util.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * util.c: string utility things
  19  *
  20  * 3/21/93 Rob McCool
  21  * 1995-96 Many changes by the Apache Software Foundation
  22  *
  23  */
  24
  25 /* Debugging aid:
  26  * #define DEBUG            to trace all cfg_open*()/cfg_closefile() calls
  27  * #define DEBUG_CFG_LINES  to trace every line read from the config files
  28  */
  29
  30 #include "apr.h"
  31 #include "apr_strings.h"
  32 #include "apr_lib.h"
  33
  34 #define APR_WANT_STDIO
  35 #define APR_WANT_STRFUNC
  36 #include "apr_want.h"
  37
  38 #if APR_HAVE_UNISTD_H
  39 #include <unistd.h>
  40 #endif
  41 #if APR_HAVE_PROCESS_H
  42 #include <process.h>            /* for getpid() on Win32 */
  43 #endif
  44 #if APR_HAVE_NETDB_H
  45 #include <netdb.h>              /* for gethostbyname() */
  46 #endif
  47
  48 #include "ap_config.h"
  49 #include "apr_base64.h"
  50 #include "httpd.h"
  51 #include "http_main.h"
  52 #include "http_log.h"
  53 #include "http_protocol.h"
  54 #include "http_config.h"
  55 #include "http_core.h"
  56 #include "util_ebcdic.h"
  57 #include "util_varbuf.h"
  58
  59 #ifdef HAVE_PWD_H
  60 #include <pwd.h>
  61 #endif
  62 #ifdef HAVE_GRP_H
  63 #include <grp.h>
  64 #endif
  65
  66 /* A bunch of functions in util.c scan strings looking for certain characters.
  67  * To make that more efficient we encode a lookup table.  The test_char_table
  68  * is generated automatically by gen_test_char.c.
  69  */
  70 #include "test_char.h"
  71
  72 /* we assume the folks using this ensure 0 <= c < 256... which means
  73  * you need a cast to (unsigned char) first, you can't just plug a
  74  * char in here and get it to work, because if char is signed then it
  75  * will first be sign extended.
  76  */
  77 #define TEST_CHAR(c, f)        (test_char_table[(unsigned)(c)] & (f))
  78
  79 /* Win32/NetWare/OS2 need to check for both forward and back slashes
  80  * in ap_getparents() and ap_escape_url.
  81  */
  82 #ifdef CASE_BLIND_FILESYSTEM
  83 #define IS_SLASH(s) ((s == '/') || (s == '\\'))
  84 #define SLASHES "/\\"
  85 #else
  86 #define IS_SLASH(s) (s == '/')
  87 #define SLASHES "/"
  88 #endif
  89
  90 /* we know core's module_index is 0 */
  91 #undef APLOG_MODULE_INDEX
  92 #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
  93
  94
  95 /*
  96  * Examine a field value (such as a media-/content-type) string and return
  97  * it sans any parameters; e.g., strip off any ';charset=foo' and the like.
  98  */
  99 AP_DECLARE(char *) ap_field_noparam(apr_pool_t *p, const char *intype)
 100 {
 101     const char *semi;
 102
 103     if (intype == NULL) return NULL;
 104
 105     semi = ap_strchr_c(intype, ';');
 106     if (semi == NULL) {
 107         return apr_pstrdup(p, intype);
 108     }
 109     else {
 110         while ((semi > intype) && apr_isspace(semi[-1])) {
 111             semi--;
 112         }
 113         return apr_pstrndup(p, intype, semi - intype);
 114     }
 115 }
 116
 117 AP_DECLARE(char *) ap_ht_time(apr_pool_t *p, apr_time_t t, const char *fmt,
 118                               int gmt)
 119 {
 120     apr_size_t retcode;
 121     char ts[MAX_STRING_LEN];
 122     char tf[MAX_STRING_LEN];
 123     apr_time_exp_t xt;
 124
 125     if (gmt) {
 126         const char *f;
 127         char *strp;
 128
 129         apr_time_exp_gmt(&xt, t);
 130         /* Convert %Z to "GMT" and %z to "+0000";
 131          * on hosts that do not have a time zone string in struct tm,
 132          * strftime must assume its argument is local time.
 133          */
 134         for(strp = tf, f = fmt; strp < tf + sizeof(tf) - 6 && (*strp = *f)
 135             ; f++, strp++) {
 136             if (*f != '%') continue;
 137             switch (f[1]) {
 138             case '%':
 139                 *++strp = *++f;
 140                 break;
 141             case 'Z':
 142                 *strp++ = 'G';
 143                 *strp++ = 'M';
 144                 *strp = 'T';
 145                 f++;
 146                 break;
 147             case 'z': /* common extension */
 148                 *strp++ = '+';
 149                 *strp++ = '0';
 150                 *strp++ = '0';
 151                 *strp++ = '0';
 152                 *strp = '0';
 153                 f++;
 154                 break;
 155             }
 156         }
 157         *strp = '\0';
 158         fmt = tf;
 159     }
 160     else {
 161         apr_time_exp_lt(&xt, t);
 162     }
 163
 164     /* check return code? */
 165     apr_strftime(ts, &retcode, MAX_STRING_LEN, fmt, &xt);
 166     ts[MAX_STRING_LEN - 1] = '\0';
 167     return apr_pstrdup(p, ts);
 168 }
 169
 170 /* Roy owes Rob beer. */
 171 /* Rob owes Roy dinner. */
 172
 173 /* These legacy comments would make a lot more sense if Roy hadn't
 174  * replaced the old later_than() routine with util_date.c.
 175  *
 176  * Well, okay, they still wouldn't make any sense.
 177  */
 178
 179 /* Match = 0, NoMatch = 1, Abort = -1
 180  * Based loosely on sections of wildmat.c by Rich Salz
 181  * Hmmm... shouldn't this really go component by component?
 182  */
 183 AP_DECLARE(int) ap_strcmp_match(const char *str, const char *expected)
 184 {
 185     int x, y;
 186
 187     for (x = 0, y = 0; expected[y]; ++y, ++x) {
 188         if ((!str[x]) && (expected[y] != '*'))
 189             return -1;
 190         if (expected[y] == '*') {
 191             while (expected[++y] == '*');
 192             if (!expected[y])
 193                 return 0;
 194             while (str[x]) {
 195                 int ret;
 196                 if ((ret = ap_strcmp_match(&str[x++], &expected[y])) != 1)
 197                     return ret;
 198             }
 199             return -1;
 200         }
 201         else if ((expected[y] != '?') && (str[x] != expected[y]))
 202             return 1;
 203     }
 204     return (str[x] != '\0');
 205 }
 206
 207 AP_DECLARE(int) ap_strcasecmp_match(const char *str, const char *expected)
 208 {
 209     int x, y;
 210
 211     for (x = 0, y = 0; expected[y]; ++y, ++x) {
 212         if (!str[x] && expected[y] != '*')
 213             return -1;
 214         if (expected[y] == '*') {
 215             while (expected[++y] == '*');
 216             if (!expected[y])
 217                 return 0;
 218             while (str[x]) {
 219                 int ret;
 220                 if ((ret = ap_strcasecmp_match(&str[x++], &expected[y])) != 1)
 221                     return ret;
 222             }
 223             return -1;
 224         }
 225         else if (expected[y] != '?'
 226                  && apr_tolower(str[x]) != apr_tolower(expected[y]))
 227             return 1;
 228     }
 229     return (str[x] != '\0');
 230 }
 231
 232 /* We actually compare the canonical root to this root, (but we don't
 233  * waste time checking the case), since every use of this function in
 234  * httpd-2.1 tests if the path is 'proper', meaning we've already passed
 235  * it through apr_filepath_merge, or we haven't.
 236  */
 237 AP_DECLARE(int) ap_os_is_path_absolute(apr_pool_t *p, const char *dir)
 238 {
 239     const char *newpath;
 240     const char *ourdir = dir;
 241     if (apr_filepath_root(&newpath, &dir, 0, p) != APR_SUCCESS
 242             || strncmp(newpath, ourdir, strlen(newpath)) != 0) {
 243         return 0;
 244     }
 245     return 1;
 246 }
 247
 248 AP_DECLARE(int) ap_is_matchexp(const char *str)
 249 {
 250     register int x;
 251
 252     for (x = 0; str[x]; x++)
 253         if ((str[x] == '*') || (str[x] == '?'))
 254             return 1;
 255     return 0;
 256 }
 257
 258 /*
 259  * Here's a pool-based interface to the POSIX-esque ap_regcomp().
 260  * Note that we return ap_regex_t instead of being passed one.
 261  * The reason is that if you use an already-used ap_regex_t structure,
 262  * the memory that you've already allocated gets forgotten, and
 263  * regfree() doesn't clear it. So we don't allow it.
 264  */
 265
 266 static apr_status_t regex_cleanup(void *preg)
 267 {
 268     ap_regfree((ap_regex_t *) preg);
 269     return APR_SUCCESS;
 270 }
 271
 272 AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
 273                                      int cflags)
 274 {
 275     ap_regex_t *preg = apr_palloc(p, sizeof *preg);
 276     int err = ap_regcomp(preg, pattern, cflags);
 277     if (err) {
 278         if (err == AP_REG_ESPACE)
 279             ap_abort_on_oom();
 280         return NULL;
 281     }
 282
 283     apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
 284                               apr_pool_cleanup_null);
 285
 286     return preg;
 287 }
 288
 289 AP_DECLARE(void) ap_pregfree(apr_pool_t *p, ap_regex_t *reg)
 290 {
 291     ap_regfree(reg);
 292     apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup);
 293 }
 294
 295 /*
 296  * Similar to standard strstr() but we ignore case in this version.
 297  * Based on the strstr() implementation further below.
 298  */
 299 AP_DECLARE(char *) ap_strcasestr(const char *s1, const char *s2)
 300 {
 301     char *p1, *p2;
 302     if (*s2 == '\0') {
 303         /* an empty s2 */
 304         return((char *)s1);
 305     }
 306     while(1) {
 307         for ( ; (*s1 != '\0') && (apr_tolower(*s1) != apr_tolower(*s2)); s1++);
 308         if (*s1 == '\0') {
 309             return(NULL);
 310         }
 311         /* found first character of s2, see if the rest matches */
 312         p1 = (char *)s1;
 313         p2 = (char *)s2;
 314         for (++p1, ++p2; apr_tolower(*p1) == apr_tolower(*p2); ++p1, ++p2) {
 315             if (*p1 == '\0') {
 316                 /* both strings ended together */
 317                 return((char *)s1);
 318             }
 319         }
 320         if (*p2 == '\0') {
 321             /* second string ended, a match */
 322             break;
 323         }
 324         /* didn't find a match here, try starting at next character in s1 */
 325         s1++;
 326     }
 327     return((char *)s1);
 328 }
 329
 330 /*
 331  * Returns an offsetted pointer in bigstring immediately after
 332  * prefix. Returns bigstring if bigstring doesn't start with
 333  * prefix or if prefix is longer than bigstring while still matching.
 334  * NOTE: pointer returned is relative to bigstring, so we
 335  * can use standard pointer comparisons in the calling function
 336  * (eg: test if ap_stripprefix(a,b) == a)
 337  */
 338 AP_DECLARE(const char *) ap_stripprefix(const char *bigstring,
 339                                         const char *prefix)
 340 {
 341     const char *p1;
 342
 343     if (*prefix == '\0')
 344         return bigstring;
 345
 346     p1 = bigstring;
 347     while (*p1 && *prefix) {
 348         if (*p1++ != *prefix++)
 349             return bigstring;
 350     }
 351     if (*prefix == '\0')
 352         return p1;
 353
 354     /* hit the end of bigstring! */
 355     return bigstring;
 356 }
 357
 358 /* This function substitutes for $0-$9, filling in regular expression
 359  * submatches. Pass it the same nmatch and pmatch arguments that you
 360  * passed ap_regexec(). pmatch should not be greater than the maximum number
 361  * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t.
 362  *
 363  * nmatch must be <=AP_MAX_REG_MATCH (10).
 364  *
 365  * input should be the string with the $-expressions, source should be the
 366  * string that was matched against.
 367  *
 368  * It returns the substituted string, or NULL if a vbuf is used.
 369  * On errors, returns the orig string.
 370  *
 371  * Parts of this code are based on Henry Spencer's regsub(), from his
 372  * AT&T V8 regexp package.
 373  */
 374
 375 static apr_status_t regsub_core(apr_pool_t *p, char **result,
 376                                 struct ap_varbuf *vb, const char *input,
 377                                 const char *source, apr_size_t nmatch,
 378                                 ap_regmatch_t pmatch[], apr_size_t maxlen)
 379 {
 380     const char *src = input;
 381     char *dst;
 382     char c;
 383     apr_size_t no;
 384     apr_size_t len = 0;
 385
 386     AP_DEBUG_ASSERT((result && p && !vb) || (vb && !p && !result));
 387     if (!source || nmatch>AP_MAX_REG_MATCH)
 388         return APR_EINVAL;
 389     if (!nmatch) {
 390         len = strlen(src);
 391         if (maxlen > 0 && len >= maxlen)
 392             return APR_ENOMEM;
 393         if (!vb) {
 394             *result = apr_pstrmemdup(p, src, len);
 395             return APR_SUCCESS;
 396         }
 397         else {
 398             ap_varbuf_strmemcat(vb, src, len);
 399             return APR_SUCCESS;
 400         }
 401     }
 402
 403     /* First pass, find the size */
 404     while ((c = *src++) != '\0') {
 405         if (c == '$' && apr_isdigit(*src))
 406             no = *src++ - '0';
 407         else
 408             no = AP_MAX_REG_MATCH;
 409
 410         if (no >= AP_MAX_REG_MATCH) {  /* Ordinary character. */
 411             if (c == '\\' && *src)
 412                 src++;
 413             len++;
 414         }
 415         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 416             if (APR_SIZE_MAX - len <= pmatch[no].rm_eo - pmatch[no].rm_so)
 417                 return APR_ENOMEM;
 418             len += pmatch[no].rm_eo - pmatch[no].rm_so;
 419         }
 420
 421     }
 422
 423     if (len >= maxlen && maxlen > 0)
 424         return APR_ENOMEM;
 425
 426     if (!vb) {
 427         *result = dst = apr_palloc(p, len + 1);
 428     }
 429     else {
 430         if (vb->strlen == AP_VARBUF_UNKNOWN)
 431             vb->strlen = strlen(vb->buf);
 432         ap_varbuf_grow(vb, vb->strlen + len);
 433         dst = vb->buf + vb->strlen;
 434         vb->strlen += len;
 435     }
 436
 437     /* Now actually fill in the string */
 438
 439     src = input;
 440
 441     while ((c = *src++) != '\0') {
 442         if (c == '$' && apr_isdigit(*src))
 443             no = *src++ - '0';
 444         else
 445             no = AP_MAX_REG_MATCH;
 446
 447         if (no >= AP_MAX_REG_MATCH) {  /* Ordinary character. */
 448             if (c == '\\' && *src)
 449                 c = *src++;
 450             *dst++ = c;
 451         }
 452         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 453             len = pmatch[no].rm_eo - pmatch[no].rm_so;
 454             memcpy(dst, source + pmatch[no].rm_so, len);
 455             dst += len;
 456         }
 457
 458     }
 459     *dst = '\0';
 460
 461     return APR_SUCCESS;
 462 }
 463
 464 #ifndef AP_PREGSUB_MAXLEN
 465 #define AP_PREGSUB_MAXLEN   (HUGE_STRING_LEN * 8)
 466 #endif
 467 AP_DECLARE(char *) ap_pregsub(apr_pool_t *p, const char *input,
 468                               const char *source, apr_size_t nmatch,
 469                               ap_regmatch_t pmatch[])
 470 {
 471     char *result;
 472     apr_status_t rc = regsub_core(p, &result, NULL, input, source, nmatch,
 473                                   pmatch, AP_PREGSUB_MAXLEN);
 474     if (rc != APR_SUCCESS)
 475         result = NULL;
 476     return result;
 477 }
 478
 479 AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t *p, char **result,
 480                                        const char *input, const char *source,
 481                                        apr_size_t nmatch, ap_regmatch_t pmatch[],
 482                                        apr_size_t maxlen)
 483 {
 484     apr_status_t rc = regsub_core(p, result, NULL, input, source, nmatch,
 485                                   pmatch, maxlen);
 486     if (rc != APR_SUCCESS)
 487         *result = NULL;
 488     return rc;
 489 }
 490
 491 /*
 492  * Parse .. so we don't compromise security
 493  */
 494 AP_DECLARE(void) ap_getparents(char *name)
 495 {
 496     char *next;
 497     int l, w, first_dot;
 498
 499     /* Four paseses, as per RFC 1808 */
 500     /* a) remove ./ path segments */
 501     for (next = name; *next && (*next != '.'); next++) {
 502     }
 503
 504     l = w = first_dot = next - name;
 505     while (name[l] != '\0') {
 506         if (name[l] == '.' && IS_SLASH(name[l + 1])
 507             && (l == 0 || IS_SLASH(name[l - 1])))
 508             l += 2;
 509         else
 510             name[w++] = name[l++];
 511     }
 512
 513     /* b) remove trailing . path, segment */
 514     if (w == 1 && name[0] == '.')
 515         w--;
 516     else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
 517         w--;
 518     name[w] = '\0';
 519
 520     /* c) remove all xx/../ segments. (including leading ../ and /../) */
 521     l = first_dot;
 522
 523     while (name[l] != '\0') {
 524         if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
 525             && (l == 0 || IS_SLASH(name[l - 1]))) {
 526             register int m = l + 3, n;
 527
 528             l = l - 2;
 529             if (l >= 0) {
 530                 while (l >= 0 && !IS_SLASH(name[l]))
 531                     l--;
 532                 l++;
 533             }
 534             else
 535                 l = 0;
 536             n = l;
 537             while ((name[n] = name[m]))
 538                 (++n, ++m);
 539         }
 540         else
 541             ++l;
 542     }
 543
 544     /* d) remove trailing xx/.. segment. */
 545     if (l == 2 && name[0] == '.' && name[1] == '.')
 546         name[0] = '\0';
 547     else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
 548              && IS_SLASH(name[l - 3])) {
 549         l = l - 4;
 550         if (l >= 0) {
 551             while (l >= 0 && !IS_SLASH(name[l]))
 552                 l--;
 553             l++;
 554         }
 555         else
 556             l = 0;
 557         name[l] = '\0';
 558     }
 559 }
 560
 561 AP_DECLARE(void) ap_no2slash(char *name)
 562 {
 563     char *d, *s;
 564
 565     s = d = name;
 566
 567 #ifdef HAVE_UNC_PATHS
 568     /* Check for UNC names.  Leave leading two slashes. */
 569     if (s[0] == '/' && s[1] == '/')
 570         *d++ = *s++;
 571 #endif
 572
 573     while (*s) {
 574         if ((*d++ = *s) == '/') {
 575             do {
 576                 ++s;
 577             } while (*s == '/');
 578         }
 579         else {
 580             ++s;
 581         }
 582     }
 583     *d = '\0';
 584 }
 585
 586
 587 /*
 588  * copy at most n leading directories of s into d
 589  * d should be at least as large as s plus 1 extra byte
 590  * assumes n > 0
 591  * the return value is the ever useful pointer to the trailing \0 of d
 592  *
 593  * MODIFIED FOR HAVE_DRIVE_LETTERS and NETWARE environments,
 594  * so that if n == 0, "/" is returned in d with n == 1
 595  * and s == "e:/test.html", "e:/" is returned in d
 596  * *** See also directory_walk in modules/http/http_request.c
 597
 598  * examples:
 599  *    /a/b, 0  ==> /  (true for all platforms)
 600  *    /a/b, 1  ==> /
 601  *    /a/b, 2  ==> /a/
 602  *    /a/b, 3  ==> /a/b/
 603  *    /a/b, 4  ==> /a/b/
 604  *
 605  *    c:/a/b 0 ==> /
 606  *    c:/a/b 1 ==> c:/
 607  *    c:/a/b 2 ==> c:/a/
 608  *    c:/a/b 3 ==> c:/a/b
 609  *    c:/a/b 4 ==> c:/a/b
 610  */
 611 AP_DECLARE(char *) ap_make_dirstr_prefix(char *d, const char *s, int n)
 612 {
 613     if (n < 1) {
 614         *d = '/';
 615         *++d = '\0';
 616         return (d);
 617     }
 618
 619     for (;;) {
 620         if (*s == '\0' || (*s == '/' && (--n) == 0)) {
 621             *d = '/';
 622             break;
 623         }
 624         *d++ = *s++;
 625     }
 626     *++d = 0;
 627     return (d);
 628 }
 629
 630
 631 /*
 632  * return the parent directory name including trailing / of the file s
 633  */
 634 AP_DECLARE(char *) ap_make_dirstr_parent(apr_pool_t *p, const char *s)
 635 {
 636     const char *last_slash = ap_strrchr_c(s, '/');
 637     char *d;
 638     int l;
 639
 640     if (last_slash == NULL) {
 641         return apr_pstrdup(p, "");
 642     }
 643     l = (last_slash - s) + 1;
 644     d = apr_pstrmemdup(p, s, l);
 645
 646     return (d);
 647 }
 648
 649
 650 AP_DECLARE(int) ap_count_dirs(const char *path)
 651 {
 652     register int x, n;
 653
 654     for (x = 0, n = 0; path[x]; x++)
 655         if (path[x] == '/')
 656             n++;
 657     return n;
 658 }
 659
 660 AP_DECLARE(char *) ap_getword_nc(apr_pool_t *atrans, char **line, char stop)
 661 {
 662     return ap_getword(atrans, (const char **) line, stop);
 663 }
 664
 665 AP_DECLARE(char *) ap_getword(apr_pool_t *atrans, const char **line, char stop)
 666 {
 667     const char *pos = *line;
 668     int len;
 669     char *res;
 670
 671     while ((*pos != stop) && *pos) {
 672         ++pos;
 673     }
 674
 675     len = pos - *line;
 676     res = apr_pstrmemdup(atrans, *line, len);
 677
 678     if (stop) {
 679         while (*pos == stop) {
 680             ++pos;
 681         }
 682     }
 683     *line = pos;
 684
 685     return res;
 686 }
 687
 688 AP_DECLARE(char *) ap_getword_white_nc(apr_pool_t *atrans, char **line)
 689 {
 690     return ap_getword_white(atrans, (const char **) line);
 691 }
 692
 693 AP_DECLARE(char *) ap_getword_white(apr_pool_t *atrans, const char **line)
 694 {
 695     const char *pos = *line;
 696     int len;
 697     char *res;
 698
 699     while (!apr_isspace(*pos) && *pos) {
 700         ++pos;
 701     }
 702
 703     len = pos - *line;
 704     res = apr_pstrmemdup(atrans, *line, len);
 705
 706     while (apr_isspace(*pos)) {
 707         ++pos;
 708     }
 709
 710     *line = pos;
 711
 712     return res;
 713 }
 714
 715 AP_DECLARE(char *) ap_getword_nulls_nc(apr_pool_t *atrans, char **line,
 716                                        char stop)
 717 {
 718     return ap_getword_nulls(atrans, (const char **) line, stop);
 719 }
 720
 721 AP_DECLARE(char *) ap_getword_nulls(apr_pool_t *atrans, const char **line,
 722                                     char stop)
 723 {
 724     const char *pos = ap_strchr_c(*line, stop);
 725     char *res;
 726
 727     if (!pos) {
 728         apr_size_t len = strlen(*line);
 729         res = apr_pstrmemdup(atrans, *line, len);
 730         *line += len;
 731         return res;
 732     }
 733
 734     res = apr_pstrndup(atrans, *line, pos - *line);
 735
 736     ++pos;
 737
 738     *line = pos;
 739
 740     return res;
 741 }
 742
 743 /* Get a word, (new) config-file style --- quoted strings and backslashes
 744  * all honored
 745  */
 746
 747 static char *substring_conf(apr_pool_t *p, const char *start, int len,
 748                             char quote)
 749 {
 750     char *result = apr_palloc(p, len + 2);
 751     char *resp = result;
 752     int i;
 753
 754     for (i = 0; i < len; ++i) {
 755         if (start[i] == '\\' && (start[i + 1] == '\\'
 756                                  || (quote && start[i + 1] == quote)))
 757             *resp++ = start[++i];
 758         else
 759             *resp++ = start[i];
 760     }
 761
 762     *resp++ = '\0';
 763 #if RESOLVE_ENV_PER_TOKEN
 764     return (char *)ap_resolve_env(p,result);
 765 #else
 766     return result;
 767 #endif
 768 }
 769
 770 AP_DECLARE(char *) ap_getword_conf_nc(apr_pool_t *p, char **line)
 771 {
 772     return ap_getword_conf(p, (const char **) line);
 773 }
 774
 775 AP_DECLARE(char *) ap_getword_conf(apr_pool_t *p, const char **line)
 776 {
 777     const char *str = *line, *strend;
 778     char *res;
 779     char quote;
 780
 781     while (*str && apr_isspace(*str))
 782         ++str;
 783
 784     if (!*str) {
 785         *line = str;
 786         return "";
 787     }
 788
 789     if ((quote = *str) == '"' || quote == '\'') {
 790         strend = str + 1;
 791         while (*strend && *strend != quote) {
 792             if (*strend == '\\' && strend[1] &&
 793                 (strend[1] == quote || strend[1] == '\\')) {
 794                 strend += 2;
 795             }
 796             else {
 797                 ++strend;
 798             }
 799         }
 800         res = substring_conf(p, str + 1, strend - str - 1, quote);
 801
 802         if (*strend == quote)
 803             ++strend;
 804     }
 805     else {
 806         strend = str;
 807         while (*strend && !apr_isspace(*strend))
 808             ++strend;
 809
 810         res = substring_conf(p, str, strend - str, 0);
 811     }
 812
 813     while (*strend && apr_isspace(*strend))
 814         ++strend;
 815     *line = strend;
 816     return res;
 817 }
 818
 819 AP_DECLARE(int) ap_cfg_closefile(ap_configfile_t *cfp)
 820 {
 821 #ifdef DEBUG
 822     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, APLOGNO(00551)
 823         "Done with config file %s", cfp->name);
 824 #endif
 825     return (cfp->close == NULL) ? 0 : cfp->close(cfp->param);
 826 }
 827
 828 /* we can't use apr_file_* directly because of linking issues on Windows */
 829 static apr_status_t cfg_close(void *param)
 830 {
 831     return apr_file_close(param);
 832 }
 833
 834 static apr_status_t cfg_getch(char *ch, void *param)
 835 {
 836     return apr_file_getc(ch, param);
 837 }
 838
 839 static apr_status_t cfg_getstr(void *buf, apr_size_t bufsiz, void *param)
 840 {
 841     return apr_file_gets(buf, bufsiz, param);
 842 }
 843
 844 /* Open a ap_configfile_t as FILE, return open ap_configfile_t struct pointer */
 845 AP_DECLARE(apr_status_t) ap_pcfg_openfile(ap_configfile_t **ret_cfg,
 846                                           apr_pool_t *p, const char *name)
 847 {
 848     ap_configfile_t *new_cfg;
 849     apr_file_t *file = NULL;
 850     apr_finfo_t finfo;
 851     apr_status_t status;
 852 #ifdef DEBUG
 853     char buf[120];
 854 #endif
 855
 856     if (name == NULL) {
 857         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, APLOGNO(00552)
 858                "Internal error: pcfg_openfile() called with NULL filename");
 859         return APR_EBADF;
 860     }
 861
 862     status = apr_file_open(&file, name, APR_READ | APR_BUFFERED,
 863                            APR_OS_DEFAULT, p);
 864 #ifdef DEBUG
 865     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, APLOGNO(00553)
 866                 "Opening config file %s (%s)",
 867                 name, (status != APR_SUCCESS) ?
 868                 apr_strerror(status, buf, sizeof(buf)) : "successful");
 869 #endif
 870     if (status != APR_SUCCESS)
 871         return status;
 872
 873     status = apr_file_info_get(&finfo, APR_FINFO_TYPE, file);
 874     if (status != APR_SUCCESS)
 875         return status;
 876
 877     if (finfo.filetype != APR_REG &&
 878 #if defined(WIN32) || defined(OS2) || defined(NETWARE)
 879         strcasecmp(apr_filepath_name_get(name), "nul") != 0) {
 880 #else
 881         strcmp(name, "/dev/null") != 0) {
 882 #endif /* WIN32 || OS2 */
 883         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, APLOGNO(00554)
 884                      "Access to file %s denied by server: not a regular file",
 885                      name);
 886         apr_file_close(file);
 887         return APR_EBADF;
 888     }
 889
 890 #ifdef WIN32
 891     /* Some twisted character [no pun intended] at MS decided that a
 892      * zero width joiner as the lead wide character would be ideal for
 893      * describing Unicode text files.  This was further convoluted to
 894      * another MSism that the same character mapped into utf-8, EF BB BF
 895      * would signify utf-8 text files.
 896      *
 897      * Since MS configuration files are all protecting utf-8 encoded
 898      * Unicode path, file and resource names, we already have the correct
 899      * WinNT encoding.  But at least eat the stupid three bytes up front.
 900      */
 901     {
 902         unsigned char buf[4];
 903         apr_size_t len = 3;
 904         status = apr_file_read(file, buf, &len);
 905         if ((status != APR_SUCCESS) || (len < 3)
 906               || memcmp(buf, "\xEF\xBB\xBF", 3) != 0) {
 907             apr_off_t zero = 0;
 908             apr_file_seek(file, APR_SET, &zero);
 909         }
 910     }
 911 #endif
 912
 913     new_cfg = apr_palloc(p, sizeof(*new_cfg));
 914     new_cfg->param = file;
 915     new_cfg->name = apr_pstrdup(p, name);
 916     new_cfg->getch = cfg_getch;
 917     new_cfg->getstr = cfg_getstr;
 918     new_cfg->close = cfg_close;
 919     new_cfg->line_number = 0;
 920     *ret_cfg = new_cfg;
 921     return APR_SUCCESS;
 922 }
 923
 924
 925 /* Allocate a ap_configfile_t handle with user defined functions and params */
 926 AP_DECLARE(ap_configfile_t *) ap_pcfg_open_custom(
 927             apr_pool_t *p, const char *descr, void *param,
 928             apr_status_t (*getc_func) (char *ch, void *param),
 929             apr_status_t (*gets_func) (void *buf, apr_size_t bufsize, void *param),
 930             apr_status_t (*close_func) (void *param))
 931 {
 932     ap_configfile_t *new_cfg = apr_palloc(p, sizeof(*new_cfg));
 933     new_cfg->param = param;
 934     new_cfg->name = descr;
 935     new_cfg->getch = getc_func;
 936     new_cfg->getstr = gets_func;
 937     new_cfg->close = close_func;
 938     new_cfg->line_number = 0;
 939     return new_cfg;
 940 }
 941
 942 /* Read one character from a configfile_t */
 943 AP_DECLARE(apr_status_t) ap_cfg_getc(char *ch, ap_configfile_t *cfp)
 944 {
 945     apr_status_t rc = cfp->getch(ch, cfp->param);
 946     if (rc == APR_SUCCESS && *ch == LF)
 947         ++cfp->line_number;
 948     return rc;
 949 }
 950
 951 AP_DECLARE(const char *) ap_pcfg_strerror(apr_pool_t *p, ap_configfile_t *cfp,
 952                                           apr_status_t rc)
 953 {
 954     char buf[MAX_STRING_LEN];
 955     if (rc == APR_SUCCESS)
 956         return NULL;
 957     return apr_psprintf(p, "Error reading %s at line %d: %s",
 958                         cfp->name, cfp->line_number,
 959                         rc == APR_ENOSPC ? "Line too long"
 960                                          : apr_strerror(rc, buf, sizeof(buf)));
 961 }
 962
 963 /* Read one line from open ap_configfile_t, strip LF, increase line number */
 964 /* If custom handler does not define a getstr() function, read char by char */
 965 static apr_status_t ap_cfg_getline_core(char *buf, apr_size_t bufsize,
 966                                         ap_configfile_t *cfp)
 967 {
 968     apr_status_t rc;
 969     /* If a "get string" function is defined, use it */
 970     if (cfp->getstr != NULL) {
 971         char *cp;
 972         char *cbuf = buf;
 973         apr_size_t cbufsize = bufsize;
 974
 975         while (1) {
 976             ++cfp->line_number;
 977             rc = cfp->getstr(cbuf, cbufsize, cfp->param);
 978             if (rc == APR_EOF) {
 979                 if (cbuf != buf) {
 980                     *cbuf = '\0';
 981                     break;
 982                 }
 983                 else {
 984                     return APR_EOF;
 985                 }
 986             }
 987             if (rc != APR_SUCCESS) {
 988                 return rc;
 989             }
 990
 991             /*
 992              *  check for line continuation,
 993              *  i.e. match [^\\]\\[\r]\n only
 994              */
 995             cp = cbuf;
 996             cp += strlen(cp);
 997             if (cp > cbuf && cp[-1] == LF) {
 998                 cp--;
 999                 if (cp > cbuf && cp[-1] == CR)
1000                     cp--;
1001                 if (cp > cbuf && cp[-1] == '\\') {
1002                     cp--;
1003                     /*
1004                      * line continuation requested -
1005                      * then remove backslash and continue
1006                      */
1007                     cbufsize -= (cp-cbuf);
1008                     cbuf = cp;
1009                     continue;
1010                 }
1011             }
1012             else if (cp - buf >= bufsize - 1) {
1013                 return APR_ENOSPC;
1014             }
1015             break;
1016         }
1017     } else {
1018         /* No "get string" function defined; read character by character */
1019         apr_size_t i = 0;
1020
1021         if (bufsize < 2) {
1022             /* too small, assume caller is crazy */
1023             return APR_EINVAL;
1024         }
1025         buf[0] = '\0';
1026
1027         while (1) {
1028             char c;
1029             rc = cfp->getch(&c, cfp->param);
1030             if (rc == APR_EOF) {
1031                 if (i > 0)
1032                     break;
1033                 else
1034                     return APR_EOF;
1035             }
1036             if (rc != APR_SUCCESS)
1037                 return rc;
1038             if (c == LF) {
1039                 ++cfp->line_number;
1040                 /* check for line continuation */
1041                 if (i > 0 && buf[i-1] == '\\') {
1042                     i--;
1043                     continue;
1044                 }
1045                 else {
1046                     break;
1047                 }
1048             }
1049             else if (i >= bufsize - 2) {
1050                 return APR_ENOSPC;
1051             }
1052             buf[i] = c;
1053             ++i;
1054         }
1055         buf[i] = '\0';
1056     }
1057     return APR_SUCCESS;
1058 }
1059
1060 static int cfg_trim_line(char *buf)
1061 {
1062     char *start, *end;
1063     /*
1064      * Leading and trailing white space is eliminated completely
1065      */
1066     start = buf;
1067     while (apr_isspace(*start))
1068         ++start;
1069     /* blast trailing whitespace */
1070     end = &start[strlen(start)];
1071     while (--end >= start && apr_isspace(*end))
1072         *end = '\0';
1073     /* Zap leading whitespace by shifting */
1074     if (start != buf)
1075         memmove(buf, start, end - start + 2);
1076 #ifdef DEBUG_CFG_LINES
1077     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, NULL, APLOGNO(00555) "Read config: '%s'", buf);
1078 #endif
1079     return end - start + 1;
1080 }
1081
1082 /* Read one line from open ap_configfile_t, strip LF, increase line number */
1083 /* If custom handler does not define a getstr() function, read char by char */
1084 AP_DECLARE(apr_status_t) ap_cfg_getline(char *buf, apr_size_t bufsize,
1085                                         ap_configfile_t *cfp)
1086 {
1087     apr_status_t rc = ap_cfg_getline_core(buf, bufsize, cfp);
1088     if (rc == APR_SUCCESS)
1089         cfg_trim_line(buf);
1090     return rc;
1091 }
1092
1093 AP_DECLARE(apr_status_t) ap_varbuf_cfg_getline(struct ap_varbuf *vb,
1094                                                ap_configfile_t *cfp,
1095                                                apr_size_t max_len)
1096 {
1097     apr_status_t rc;
1098     apr_size_t new_len;
1099     vb->strlen = 0;
1100     *vb->buf = '\0';
1101
1102     if (vb->strlen == AP_VARBUF_UNKNOWN)
1103         vb->strlen = strlen(vb->buf);
1104     if (vb->avail - vb->strlen < 3) {
1105         new_len = vb->avail * 2;
1106         if (new_len > max_len)
1107             new_len = max_len;
1108         else if (new_len < 3)
1109             new_len = 3;
1110         ap_varbuf_grow(vb, new_len);
1111     }
1112
1113     for (;;) {
1114         rc = ap_cfg_getline_core(vb->buf + vb->strlen, vb->avail - vb->strlen, cfp);
1115         if (rc == APR_ENOSPC || rc == APR_SUCCESS)
1116             vb->strlen += strlen(vb->buf + vb->strlen);
1117         if (rc != APR_ENOSPC)
1118             break;
1119         if (vb->avail >= max_len)
1120             return APR_ENOSPC;
1121         new_len = vb->avail * 2;
1122         if (new_len > max_len)
1123             new_len = max_len;
1124         ap_varbuf_grow(vb, new_len);
1125         --cfp->line_number;
1126     }
1127     if (vb->strlen > max_len)
1128         return APR_ENOSPC;
1129     if (rc == APR_SUCCESS)
1130         vb->strlen = cfg_trim_line(vb->buf);
1131     return rc;
1132 }
1133
1134 /* Size an HTTP header field list item, as separated by a comma.
1135  * The return value is a pointer to the beginning of the non-empty list item
1136  * within the original string (or NULL if there is none) and the address
1137  * of field is shifted to the next non-comma, non-whitespace character.
1138  * len is the length of the item excluding any beginning whitespace.
1139  */
1140 AP_DECLARE(const char *) ap_size_list_item(const char **field, int *len)
1141 {
1142     const unsigned char *ptr = (const unsigned char *)*field;
1143     const unsigned char *token;
1144     int in_qpair, in_qstr, in_com;
1145
1146     /* Find first non-comma, non-whitespace byte */
1147
1148     while (*ptr == ',' || apr_isspace(*ptr))
1149         ++ptr;
1150
1151     token = ptr;
1152
1153     /* Find the end of this item, skipping over dead bits */
1154
1155     for (in_qpair = in_qstr = in_com = 0;
1156          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1157          ++ptr) {
1158
1159         if (in_qpair) {
1160             in_qpair = 0;
1161         }
1162         else {
1163             switch (*ptr) {
1164                 case '\\': in_qpair = 1;      /* quoted-pair         */
1165                            break;
1166                 case '"' : if (!in_com)       /* quoted string delim */
1167                                in_qstr = !in_qstr;
1168                            break;
1169                 case '(' : if (!in_qstr)      /* comment (may nest)  */
1170                                ++in_com;
1171                            break;
1172                 case ')' : if (in_com)        /* end comment         */
1173                                --in_com;
1174                            break;
1175                 default  : break;
1176             }
1177         }
1178     }
1179
1180     if ((*len = (ptr - token)) == 0) {
1181         *field = (const char *)ptr;
1182         return NULL;
1183     }
1184
1185     /* Advance field pointer to the next non-comma, non-white byte */
1186
1187     while (*ptr == ',' || apr_isspace(*ptr))
1188         ++ptr;
1189
1190     *field = (const char *)ptr;
1191     return (const char *)token;
1192 }
1193
1194 /* Retrieve an HTTP header field list item, as separated by a comma,
1195  * while stripping insignificant whitespace and lowercasing anything not in
1196  * a quoted string or comment.  The return value is a new string containing
1197  * the converted list item (or NULL if none) and the address pointed to by
1198  * field is shifted to the next non-comma, non-whitespace.
1199  */
1200 AP_DECLARE(char *) ap_get_list_item(apr_pool_t *p, const char **field)
1201 {
1202     const char *tok_start;
1203     const unsigned char *ptr;
1204     unsigned char *pos;
1205     char *token;
1206     int addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0, tok_len = 0;
1207
1208     /* Find the beginning and maximum length of the list item so that
1209      * we can allocate a buffer for the new string and reset the field.
1210      */
1211     if ((tok_start = ap_size_list_item(field, &tok_len)) == NULL) {
1212         return NULL;
1213     }
1214     token = apr_palloc(p, tok_len + 1);
1215
1216     /* Scan the token again, but this time copy only the good bytes.
1217      * We skip extra whitespace and any whitespace around a '=', '/',
1218      * or ';' and lowercase normal characters not within a comment,
1219      * quoted-string or quoted-pair.
1220      */
1221     for (ptr = (const unsigned char *)tok_start, pos = (unsigned char *)token;
1222          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1223          ++ptr) {
1224
1225         if (in_qpair) {
1226             in_qpair = 0;
1227             *pos++ = *ptr;
1228         }
1229         else {
1230             switch (*ptr) {
1231                 case '\\': in_qpair = 1;
1232                            if (addspace == 1)
1233                                *pos++ = ' ';
1234                            *pos++ = *ptr;
1235                            addspace = 0;
1236                            break;
1237                 case '"' : if (!in_com)
1238                                in_qstr = !in_qstr;
1239                            if (addspace == 1)
1240                                *pos++ = ' ';
1241                            *pos++ = *ptr;
1242                            addspace = 0;
1243                            break;
1244                 case '(' : if (!in_qstr)
1245                                ++in_com;
1246                            if (addspace == 1)
1247                                *pos++ = ' ';
1248                            *pos++ = *ptr;
1249                            addspace = 0;
1250                            break;
1251                 case ')' : if (in_com)
1252                                --in_com;
1253                            *pos++ = *ptr;
1254                            addspace = 0;
1255                            break;
1256                 case ' ' :
1257                 case '\t': if (addspace)
1258                                break;
1259                            if (in_com || in_qstr)
1260                                *pos++ = *ptr;
1261                            else
1262                                addspace = 1;
1263                            break;
1264                 case '=' :
1265                 case '/' :
1266                 case ';' : if (!(in_com || in_qstr))
1267                                addspace = -1;
1268                            *pos++ = *ptr;
1269                            break;
1270                 default  : if (addspace == 1)
1271                                *pos++ = ' ';
1272                            *pos++ = (in_com || in_qstr) ? *ptr
1273                                                         : apr_tolower(*ptr);
1274                            addspace = 0;
1275                            break;
1276             }
1277         }
1278     }
1279     *pos = '\0';
1280
1281     return token;
1282 }
1283
1284 /* Find an item in canonical form (lowercase, no extra spaces) within
1285  * an HTTP field value list.  Returns 1 if found, 0 if not found.
1286  * This would be much more efficient if we stored header fields as
1287  * an array of list items as they are received instead of a plain string.
1288  */
1289 AP_DECLARE(int) ap_find_list_item(apr_pool_t *p, const char *line,
1290                                   const char *tok)
1291 {
1292     const unsigned char *pos;
1293     const unsigned char *ptr = (const unsigned char *)line;
1294     int good = 0, addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0;
1295
1296     if (!line || !tok)
1297         return 0;
1298
1299     do {  /* loop for each item in line's list */
1300
1301         /* Find first non-comma, non-whitespace byte */
1302
1303         while (*ptr == ',' || apr_isspace(*ptr))
1304             ++ptr;
1305
1306         if (*ptr)
1307             good = 1;  /* until proven otherwise for this item */
1308         else
1309             break;     /* no items left and nothing good found */
1310
1311         /* We skip extra whitespace and any whitespace around a '=', '/',
1312          * or ';' and lowercase normal characters not within a comment,
1313          * quoted-string or quoted-pair.
1314          */
1315         for (pos = (const unsigned char *)tok;
1316              *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1317              ++ptr) {
1318
1319             if (in_qpair) {
1320                 in_qpair = 0;
1321                 if (good)
1322                     good = (*pos++ == *ptr);
1323             }
1324             else {
1325                 switch (*ptr) {
1326                     case '\\': in_qpair = 1;
1327                                if (addspace == 1)
1328                                    good = good && (*pos++ == ' ');
1329                                good = good && (*pos++ == *ptr);
1330                                addspace = 0;
1331                                break;
1332                     case '"' : if (!in_com)
1333                                    in_qstr = !in_qstr;
1334                                if (addspace == 1)
1335                                    good = good && (*pos++ == ' ');
1336                                good = good && (*pos++ == *ptr);
1337                                addspace = 0;
1338                                break;
1339                     case '(' : if (!in_qstr)
1340                                    ++in_com;
1341                                if (addspace == 1)
1342                                    good = good && (*pos++ == ' ');
1343                                good = good && (*pos++ == *ptr);
1344                                addspace = 0;
1345                                break;
1346                     case ')' : if (in_com)
1347                                    --in_com;
1348                                good = good && (*pos++ == *ptr);
1349                                addspace = 0;
1350                                break;
1351                     case ' ' :
1352                     case '\t': if (addspace || !good)
1353                                    break;
1354                                if (in_com || in_qstr)
1355                                    good = (*pos++ == *ptr);
1356                                else
1357                                    addspace = 1;
1358                                break;
1359                     case '=' :
1360                     case '/' :
1361                     case ';' : if (!(in_com || in_qstr))
1362                                    addspace = -1;
1363                                good = good && (*pos++ == *ptr);
1364                                break;
1365                     default  : if (!good)
1366                                    break;
1367                                if (addspace == 1)
1368                                    good = (*pos++ == ' ');
1369                                if (in_com || in_qstr)
1370                                    good = good && (*pos++ == *ptr);
1371                                else
1372                                    good = good && (*pos++ == apr_tolower(*ptr));
1373                                addspace = 0;
1374                                break;
1375                 }
1376             }
1377         }
1378         if (good && *pos)
1379             good = 0;          /* not good if only a prefix was matched */
1380
1381     } while (*ptr && !good);
1382
1383     return good;
1384 }
1385
1386
1387 /* Retrieve a token, spacing over it and returning a pointer to
1388  * the first non-white byte afterwards.  Note that these tokens
1389  * are delimited by semis and commas; and can also be delimited
1390  * by whitespace at the caller's option.
1391  */
1392
1393 AP_DECLARE(char *) ap_get_token(apr_pool_t *p, const char **accept_line,
1394                                 int accept_white)
1395 {
1396     const char *ptr = *accept_line;
1397     const char *tok_start;
1398     char *token;
1399     int tok_len;
1400
1401     /* Find first non-white byte */
1402
1403     while (*ptr && apr_isspace(*ptr))
1404         ++ptr;
1405
1406     tok_start = ptr;
1407
1408     /* find token end, skipping over quoted strings.
1409      * (comments are already gone).
1410      */
1411
1412     while (*ptr && (accept_white || !apr_isspace(*ptr))
1413            && *ptr != ';' && *ptr != ',') {
1414         if (*ptr++ == '"')
1415             while (*ptr)
1416                 if (*ptr++ == '"')
1417                     break;
1418     }
1419
1420     tok_len = ptr - tok_start;
1421     token = apr_pstrndup(p, tok_start, tok_len);
1422
1423     /* Advance accept_line pointer to the next non-white byte */
1424
1425     while (*ptr && apr_isspace(*ptr))
1426         ++ptr;
1427
1428     *accept_line = ptr;
1429     return token;
1430 }
1431
1432
1433 /* find http tokens, see the definition of token from RFC2068 */
1434 AP_DECLARE(int) ap_find_token(apr_pool_t *p, const char *line, const char *tok)
1435 {
1436     const unsigned char *start_token;
1437     const unsigned char *s;
1438
1439     if (!line)
1440         return 0;
1441
1442     s = (const unsigned char *)line;
1443     for (;;) {
1444         /* find start of token, skip all stop characters, note NUL
1445          * isn't a token stop, so we don't need to test for it
1446          */
1447         while (TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1448             ++s;
1449         }
1450         if (!*s) {
1451             return 0;
1452         }
1453         start_token = s;
1454         /* find end of the token */
1455         while (*s && !TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1456             ++s;
1457         }
1458         if (!strncasecmp((const char *)start_token, (const char *)tok,
1459                          s - start_token)) {
1460             return 1;
1461         }
1462         if (!*s) {
1463             return 0;
1464         }
1465     }
1466 }
1467
1468
1469 AP_DECLARE(int) ap_find_last_token(apr_pool_t *p, const char *line,
1470                                    const char *tok)
1471 {
1472     int llen, tlen, lidx;
1473
1474     if (!line)
1475         return 0;
1476
1477     llen = strlen(line);
1478     tlen = strlen(tok);
1479     lidx = llen - tlen;
1480
1481     if (lidx < 0 ||
1482         (lidx > 0 && !(apr_isspace(line[lidx - 1]) || line[lidx - 1] == ',')))
1483         return 0;
1484
1485     return (strncasecmp(&line[lidx], tok, tlen) == 0);
1486 }
1487
1488 AP_DECLARE(char *) ap_escape_shell_cmd(apr_pool_t *p, const char *str)
1489 {
1490     char *cmd;
1491     unsigned char *d;
1492     const unsigned char *s;
1493
1494     cmd = apr_palloc(p, 2 * strlen(str) + 1);        /* Be safe */
1495     d = (unsigned char *)cmd;
1496     s = (const unsigned char *)str;
1497     for (; *s; ++s) {
1498
1499 #if defined(OS2) || defined(WIN32)
1500         /*
1501          * Newlines to Win32/OS2 CreateProcess() are ill advised.
1502          * Convert them to spaces since they are effectively white
1503          * space to most applications
1504          */
1505         if (*s == '\r' || *s == '\n') {
1506              *d++ = ' ';
1507              continue;
1508          }
1509 #endif
1510
1511         if (TEST_CHAR(*s, T_ESCAPE_SHELL_CMD)) {
1512             *d++ = '\\';
1513         }
1514         *d++ = *s;
1515     }
1516     *d = '\0';
1517
1518     return cmd;
1519 }
1520
1521 static char x2c(const char *what)
1522 {
1523     register char digit;
1524
1525 #if !APR_CHARSET_EBCDIC
1526     digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10
1527              : (what[0] - '0'));
1528     digit *= 16;
1529     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10
1530               : (what[1] - '0'));
1531 #else /*APR_CHARSET_EBCDIC*/
1532     char xstr[5];
1533     xstr[0]='0';
1534     xstr[1]='x';
1535     xstr[2]=what[0];
1536     xstr[3]=what[1];
1537     xstr[4]='\0';
1538     digit = apr_xlate_conv_byte(ap_hdrs_from_ascii,
1539                                 0xFF & strtol(xstr, NULL, 16));
1540 #endif /*APR_CHARSET_EBCDIC*/
1541     return (digit);
1542 }
1543
1544 /*
1545  * Unescapes a URL, leaving reserved characters intact.
1546  * Returns 0 on success, non-zero on error
1547  * Failure is due to
1548  *   bad % escape       returns HTTP_BAD_REQUEST
1549  *
1550  *   decoding %00 or a forbidden character returns HTTP_NOT_FOUND
1551  */
1552
1553 static int unescape_url(char *url, const char *forbid, const char *reserved)
1554 {
1555     register int badesc, badpath;
1556     char *x, *y;
1557
1558     badesc = 0;
1559     badpath = 0;
1560     /* Initial scan for first '%'. Don't bother writing values before
1561      * seeing a '%' */
1562     y = strchr(url, '%');
1563     if (y == NULL) {
1564         return OK;
1565     }
1566     for (x = y; *y; ++x, ++y) {
1567         if (*y != '%') {
1568             *x = *y;
1569         }
1570         else {
1571             if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
1572                 badesc = 1;
1573                 *x = '%';
1574             }
1575             else {
1576                 char decoded;
1577                 decoded = x2c(y + 1);
1578                 if ((decoded == '\0')
1579                     || (forbid && ap_strchr_c(forbid, decoded))) {
1580                     badpath = 1;
1581                     *x = decoded;
1582                     y += 2;
1583                 }
1584                 else if (reserved && ap_strchr_c(reserved, decoded)) {
1585                     *x++ = *y++;
1586                     *x++ = *y++;
1587                     *x = *y;
1588                 }
1589                 else {
1590                     *x = decoded;
1591                     y += 2;
1592                 }
1593             }
1594         }
1595     }
1596     *x = '\0';
1597     if (badesc) {
1598         return HTTP_BAD_REQUEST;
1599     }
1600     else if (badpath) {
1601         return HTTP_NOT_FOUND;
1602     }
1603     else {
1604         return OK;
1605     }
1606 }
1607 AP_DECLARE(int) ap_unescape_url(char *url)
1608 {
1609     /* Traditional */
1610     return unescape_url(url, SLASHES, NULL);
1611 }
1612 AP_DECLARE(int) ap_unescape_url_keep2f(char *url, int decode_slashes)
1613 {
1614     /* AllowEncodedSlashes (corrected) */
1615     if (decode_slashes) {
1616         /* no chars reserved */
1617         return unescape_url(url, NULL, NULL);
1618     } else {
1619         /* reserve (do not decode) encoded slashes */
1620         return unescape_url(url, NULL, SLASHES);
1621     }
1622 }
1623 #ifdef NEW_APIS
1624 /* IFDEF these out until they've been thought through.
1625  * Just a germ of an API extension for now
1626  */
1627 AP_DECLARE(int) ap_unescape_url_proxy(char *url)
1628 {
1629     /* leave RFC1738 reserved characters intact, * so proxied URLs
1630      * don't get mangled.  Where does that leave encoded '&' ?
1631      */
1632     return unescape_url(url, NULL, "/;?");
1633 }
1634 AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved)
1635 {
1636     return unescape_url(url, NULL, reserved);
1637 }
1638 #endif
1639
1640 AP_DECLARE(int) ap_unescape_urlencoded(char *query)
1641 {
1642     char *slider;
1643
1644     /* replace plus with a space */
1645     if (query) {
1646         for (slider = query; *slider; slider++) {
1647             if (*slider == '+') {
1648                 *slider = ' ';
1649             }
1650         }
1651     }
1652
1653     /* unescape everything else */
1654     return unescape_url(query, NULL, NULL);
1655 }
1656
1657 AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname,
1658                                        apr_port_t port, const request_rec *r)
1659 {
1660     if (ap_is_default_port(port, r)) {
1661         return apr_pstrdup(p, hostname);
1662     }
1663     else {
1664         return apr_psprintf(p, "%s:%u", hostname, port);
1665     }
1666 }
1667
1668 AP_DECLARE(int) ap_unescape_all(char *url)
1669 {
1670     return unescape_url(url, NULL, NULL);
1671 }
1672
1673 /* c2x takes an unsigned, and expects the caller has guaranteed that
1674  * 0 <= what < 256... which usually means that you have to cast to
1675  * unsigned char first, because (unsigned)(char)(x) first goes through
1676  * signed extension to an int before the unsigned cast.
1677  *
1678  * The reason for this assumption is to assist gcc code generation --
1679  * the unsigned char -> unsigned extension is already done earlier in
1680  * both uses of this code, so there's no need to waste time doing it
1681  * again.
1682  */
1683 static const char c2x_table[] = "0123456789abcdef";
1684
1685 static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix,
1686                                      unsigned char *where)
1687 {
1688 #if APR_CHARSET_EBCDIC
1689     what = apr_xlate_conv_byte(ap_hdrs_to_ascii, (unsigned char)what);
1690 #endif /*APR_CHARSET_EBCDIC*/
1691     *where++ = prefix;
1692     *where++ = c2x_table[what >> 4];
1693     *where++ = c2x_table[what & 0xf];
1694     return where;
1695 }
1696
1697 /*
1698  * escape_path_segment() escapes a path segment, as defined in RFC 1808. This
1699  * routine is (should be) OS independent.
1700  *
1701  * os_escape_path() converts an OS path to a URL, in an OS dependent way. In all
1702  * cases if a ':' occurs before the first '/' in the URL, the URL should be
1703  * prefixed with "./" (or the ':' escaped). In the case of Unix, this means
1704  * leaving '/' alone, but otherwise doing what escape_path_segment() does. For
1705  * efficiency reasons, we don't use escape_path_segment(), which is provided for
1706  * reference. Again, RFC 1808 is where this stuff is defined.
1707  *
1708  * If partial is set, os_escape_path() assumes that the path will be appended to
1709  * something with a '/' in it (and thus does not prefix "./").
1710  */
1711
1712 AP_DECLARE(char *) ap_escape_path_segment_buffer(char *copy, const char *segment)
1713 {
1714     const unsigned char *s = (const unsigned char *)segment;
1715     unsigned char *d = (unsigned char *)copy;
1716     unsigned c;
1717
1718     while ((c = *s)) {
1719         if (TEST_CHAR(c, T_ESCAPE_PATH_SEGMENT)) {
1720             d = c2x(c, '%', d);
1721         }
1722         else {
1723             *d++ = c;
1724         }
1725         ++s;
1726     }
1727     *d = '\0';
1728     return copy;
1729 }
1730
1731 AP_DECLARE(char *) ap_escape_path_segment(apr_pool_t *p, const char *segment)
1732 {
1733     return ap_escape_path_segment_buffer(apr_palloc(p, 3 * strlen(segment) + 1), segment);
1734 }
1735
1736 AP_DECLARE(char *) ap_os_escape_path(apr_pool_t *p, const char *path, int partial)
1737 {
1738     char *copy = apr_palloc(p, 3 * strlen(path) + 3);
1739     const unsigned char *s = (const unsigned char *)path;
1740     unsigned char *d = (unsigned char *)copy;
1741     unsigned c;
1742
1743     if (!partial) {
1744         const char *colon = ap_strchr_c(path, ':');
1745         const char *slash = ap_strchr_c(path, '/');
1746
1747         if (colon && (!slash || colon < slash)) {
1748             *d++ = '.';
1749             *d++ = '/';
1750         }
1751     }
1752     while ((c = *s)) {
1753         if (TEST_CHAR(c, T_OS_ESCAPE_PATH)) {
1754             d = c2x(c, '%', d);
1755         }
1756         else {
1757             *d++ = c;
1758         }
1759         ++s;
1760     }
1761     *d = '\0';
1762     return copy;
1763 }
1764
1765 AP_DECLARE(char *) ap_escape_urlencoded_buffer(char *copy, const char *buffer)
1766 {
1767     const unsigned char *s = (const unsigned char *)buffer;
1768     unsigned char *d = (unsigned char *)copy;
1769     unsigned c;
1770
1771     while ((c = *s)) {
1772         if (TEST_CHAR(c, T_ESCAPE_URLENCODED)) {
1773             d = c2x(c, '%', d);
1774         }
1775         else if (c == ' ') {
1776             *d++ = '+';
1777         }
1778         else {
1779             *d++ = c;
1780         }
1781         ++s;
1782     }
1783     *d = '\0';
1784     return copy;
1785 }
1786
1787 AP_DECLARE(char *) ap_escape_urlencoded(apr_pool_t *p, const char *buffer)
1788 {
1789     return ap_escape_urlencoded_buffer(apr_palloc(p, 3 * strlen(buffer) + 1), buffer);
1790 }
1791
1792 /* ap_escape_uri is now a macro for os_escape_path */
1793
1794 AP_DECLARE(char *) ap_escape_html2(apr_pool_t *p, const char *s, int toasc)
1795 {
1796     int i, j;
1797     char *x;
1798
1799     /* first, count the number of extra characters */
1800     for (i = 0, j = 0; s[i] != '\0'; i++)
1801         if (s[i] == '<' || s[i] == '>')
1802             j += 3;
1803         else if (s[i] == '&')
1804             j += 4;
1805         else if (s[i] == '"')
1806             j += 5;
1807         else if (toasc && !apr_isascii(s[i]))
1808             j += 5;
1809
1810     if (j == 0)
1811         return apr_pstrmemdup(p, s, i);
1812
1813     x = apr_palloc(p, i + j + 1);
1814     for (i = 0, j = 0; s[i] != '\0'; i++, j++)
1815         if (s[i] == '<') {
1816             memcpy(&x[j], "&lt;", 4);
1817             j += 3;
1818         }
1819         else if (s[i] == '>') {
1820             memcpy(&x[j], "&gt;", 4);
1821             j += 3;
1822         }
1823         else if (s[i] == '&') {
1824             memcpy(&x[j], "&amp;", 5);
1825             j += 4;
1826         }
1827         else if (s[i] == '"') {
1828             memcpy(&x[j], "&quot;", 6);
1829             j += 5;
1830         }
1831         else if (toasc && !apr_isascii(s[i])) {
1832             char *esc = apr_psprintf(p, "&#%3.3d;", (unsigned char)s[i]);
1833             memcpy(&x[j], esc, 6);
1834             j += 5;
1835         }
1836         else
1837             x[j] = s[i];
1838
1839     x[j] = '\0';
1840     return x;
1841 }
1842 AP_DECLARE(char *) ap_escape_logitem(apr_pool_t *p, const char *str)
1843 {
1844     char *ret;
1845     unsigned char *d;
1846     const unsigned char *s;
1847
1848     if (!str) {
1849         return NULL;
1850     }
1851
1852     ret = apr_palloc(p, 4 * strlen(str) + 1); /* Be safe */
1853     d = (unsigned char *)ret;
1854     s = (const unsigned char *)str;
1855     for (; *s; ++s) {
1856
1857         if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
1858             *d++ = '\\';
1859             switch(*s) {
1860             case '\b':
1861                 *d++ = 'b';
1862                 break;
1863             case '\n':
1864                 *d++ = 'n';
1865                 break;
1866             case '\r':
1867                 *d++ = 'r';
1868                 break;
1869             case '\t':
1870                 *d++ = 't';
1871                 break;
1872             case '\v':
1873                 *d++ = 'v';
1874                 break;
1875             case '\\':
1876             case '"':
1877                 *d++ = *s;
1878                 break;
1879             default:
1880                 c2x(*s, 'x', d);
1881                 d += 3;
1882             }
1883         }
1884         else {
1885             *d++ = *s;
1886         }
1887     }
1888     *d = '\0';
1889
1890     return ret;
1891 }
1892
1893 AP_DECLARE(apr_size_t) ap_escape_errorlog_item(char *dest, const char *source,
1894                                                apr_size_t buflen)
1895 {
1896     unsigned char *d, *ep;
1897     const unsigned char *s;
1898
1899     if (!source || !buflen) { /* be safe */
1900         return 0;
1901     }
1902
1903     d = (unsigned char *)dest;
1904     s = (const unsigned char *)source;
1905     ep = d + buflen - 1;
1906
1907     for (; d < ep && *s; ++s) {
1908
1909         if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
1910             *d++ = '\\';
1911             if (d >= ep) {
1912                 --d;
1913                 break;
1914             }
1915
1916             switch(*s) {
1917             case '\b':
1918                 *d++ = 'b';
1919                 break;
1920             case '\n':
1921                 *d++ = 'n';
1922                 break;
1923             case '\r':
1924                 *d++ = 'r';
1925                 break;
1926             case '\t':
1927                 *d++ = 't';
1928                 break;
1929             case '\v':
1930                 *d++ = 'v';
1931                 break;
1932             case '\\':
1933                 *d++ = *s;
1934                 break;
1935             case '"': /* no need for this in error log */
1936                 d[-1] = *s;
1937                 break;
1938             default:
1939                 if (d >= ep - 2) {
1940                     ep = --d; /* break the for loop as well */
1941                     break;
1942                 }
1943                 c2x(*s, 'x', d);
1944                 d += 3;
1945             }
1946         }
1947         else {
1948             *d++ = *s;
1949         }
1950     }
1951     *d = '\0';
1952
1953     return (d - (unsigned char *)dest);
1954 }
1955
1956 AP_DECLARE(int) ap_is_directory(apr_pool_t *p, const char *path)
1957 {
1958     apr_finfo_t finfo;
1959
1960     if (apr_stat(&finfo, path, APR_FINFO_TYPE, p) != APR_SUCCESS)
1961         return 0;                /* in error condition, just return no */
1962
1963     return (finfo.filetype == APR_DIR);
1964 }
1965
1966 AP_DECLARE(int) ap_is_rdirectory(apr_pool_t *p, const char *path)
1967 {
1968     apr_finfo_t finfo;
1969
1970     if (apr_stat(&finfo, path, APR_FINFO_LINK | APR_FINFO_TYPE, p) != APR_SUCCESS)
1971         return 0;                /* in error condition, just return no */
1972
1973     return (finfo.filetype == APR_DIR);
1974 }
1975
1976 AP_DECLARE(char *) ap_make_full_path(apr_pool_t *a, const char *src1,
1977                                   const char *src2)
1978 {
1979     apr_size_t len1, len2;
1980     char *path;
1981
1982     len1 = strlen(src1);
1983     len2 = strlen(src2);
1984      /* allocate +3 for '/' delimiter, trailing NULL and overallocate
1985       * one extra byte to allow the caller to add a trailing '/'
1986       */
1987     path = (char *)apr_palloc(a, len1 + len2 + 3);
1988     if (len1 == 0) {
1989         *path = '/';
1990         memcpy(path + 1, src2, len2 + 1);
1991     }
1992     else {
1993         char *next;
1994         memcpy(path, src1, len1);
1995         next = path + len1;
1996         if (next[-1] != '/') {
1997             *next++ = '/';
1998         }
1999         memcpy(next, src2, len2 + 1);
2000     }
2001     return path;
2002 }
2003
2004 /*
2005  * Check for an absoluteURI syntax (see section 3.2 in RFC2068).
2006  */
2007 AP_DECLARE(int) ap_is_url(const char *u)
2008 {
2009     register int x;
2010
2011     for (x = 0; u[x] != ':'; x++) {
2012         if ((!u[x]) ||
2013             ((!apr_isalpha(u[x])) && (!apr_isdigit(u[x])) &&
2014              (u[x] != '+') && (u[x] != '-') && (u[x] != '.'))) {
2015             return 0;
2016         }
2017     }
2018
2019     return (x ? 1 : 0);                /* If the first character is ':', it's broken, too */
2020 }
2021
2022 AP_DECLARE(int) ap_ind(const char *s, char c)
2023 {
2024     const char *p = ap_strchr_c(s, c);
2025
2026     if (p == NULL)
2027         return -1;
2028     return p - s;
2029 }
2030
2031 AP_DECLARE(int) ap_rind(const char *s, char c)
2032 {
2033     const char *p = ap_strrchr_c(s, c);
2034
2035     if (p == NULL)
2036         return -1;
2037     return p - s;
2038 }
2039
2040 AP_DECLARE(void) ap_str_tolower(char *str)
2041 {
2042     while (*str) {
2043         *str = apr_tolower(*str);
2044         ++str;
2045     }
2046 }
2047
2048 AP_DECLARE(void) ap_str_toupper(char *str)
2049 {
2050     while (*str) {
2051         *str = apr_toupper(*str);
2052         ++str;
2053     }
2054 }
2055
2056 /*
2057  * We must return a FQDN
2058  */
2059 char *ap_get_local_host(apr_pool_t *a)
2060 {
2061 #ifndef MAXHOSTNAMELEN
2062 #define MAXHOSTNAMELEN 256
2063 #endif
2064     char str[MAXHOSTNAMELEN + 1];
2065     char *server_hostname = NULL;
2066     apr_sockaddr_t *sockaddr;
2067     char *hostname;
2068
2069     if (apr_gethostname(str, sizeof(str) - 1, a) != APR_SUCCESS) {
2070         ap_log_perror(APLOG_MARK, APLOG_STARTUP | APLOG_WARNING, 0, a, APLOGNO(00556)
2071                      "%s: apr_gethostname() failed to determine ServerName",
2072                      ap_server_argv0);
2073     } else {
2074         str[sizeof(str) - 1] = '\0';
2075         if (apr_sockaddr_info_get(&sockaddr, str, APR_UNSPEC, 0, 0, a) == APR_SUCCESS) {
2076             if ( (apr_getnameinfo(&hostname, sockaddr, 0) == APR_SUCCESS) &&
2077                 (ap_strchr_c(hostname, '.')) ) {
2078                 server_hostname = apr_pstrdup(a, hostname);
2079                 return server_hostname;
2080             } else if (ap_strchr_c(str, '.')) {
2081                 server_hostname = apr_pstrdup(a, str);
2082             } else {
2083                 apr_sockaddr_ip_get(&hostname, sockaddr);
2084                 server_hostname = apr_pstrdup(a, hostname);
2085             }
2086         } else {
2087             ap_log_perror(APLOG_MARK, APLOG_STARTUP | APLOG_WARNING, 0, a, APLOGNO(00557)
2088                          "%s: apr_sockaddr_info_get() failed for %s",
2089                          ap_server_argv0, str);
2090         }
2091     }
2092
2093     if (!server_hostname)
2094         server_hostname = apr_pstrdup(a, "127.0.0.1");
2095
2096     ap_log_perror(APLOG_MARK, APLOG_ALERT|APLOG_STARTUP, 0, a, APLOGNO(00558)
2097                  "%s: Could not reliably determine the server's fully qualified "
2098                  "domain name, using %s. Set the 'ServerName' directive globally "
2099                  "to suppress this message",
2100                  ap_server_argv0, server_hostname);
2101
2102     return server_hostname;
2103 }
2104
2105 /* simple 'pool' alloc()ing glue to apr_base64.c
2106  */
2107 AP_DECLARE(char *) ap_pbase64decode(apr_pool_t *p, const char *bufcoded)
2108 {
2109     char *decoded;
2110     int l;
2111
2112     decoded = (char *) apr_palloc(p, 1 + apr_base64_decode_len(bufcoded));
2113     l = apr_base64_decode(decoded, bufcoded);
2114     decoded[l] = '\0'; /* make binary sequence into string */
2115
2116     return decoded;
2117 }
2118
2119 AP_DECLARE(char *) ap_pbase64encode(apr_pool_t *p, char *string)
2120 {
2121     char *encoded;
2122     int l = strlen(string);
2123
2124     encoded = (char *) apr_palloc(p, 1 + apr_base64_encode_len(l));
2125     l = apr_base64_encode(encoded, string, l);
2126     encoded[l] = '\0'; /* make binary sequence into string */
2127
2128     return encoded;
2129 }
2130
2131 /* we want to downcase the type/subtype for comparison purposes
2132  * but nothing else because ;parameter=foo values are case sensitive.
2133  * XXX: in truth we want to downcase parameter names... but really,
2134  * apache has never handled parameters and such correctly.  You
2135  * also need to compress spaces and such to be able to compare
2136  * properly. -djg
2137  */
2138 AP_DECLARE(void) ap_content_type_tolower(char *str)
2139 {
2140     char *semi;
2141
2142     semi = strchr(str, ';');
2143     if (semi) {
2144         *semi = '\0';
2145     }
2146
2147     ap_str_tolower(str);
2148
2149     if (semi) {
2150         *semi = ';';
2151     }
2152 }
2153
2154 /*
2155  * Given a string, replace any bare " with \" .
2156  */
2157 AP_DECLARE(char *) ap_escape_quotes(apr_pool_t *p, const char *instring)
2158 {
2159     int newlen = 0;
2160     const char *inchr = instring;
2161     char *outchr, *outstring;
2162
2163     /*
2164      * Look through the input string, jogging the length of the output
2165      * string up by an extra byte each time we find an unescaped ".
2166      */
2167     while (*inchr != '\0') {
2168         newlen++;
2169         if (*inchr == '"') {
2170             newlen++;
2171         }
2172         /*
2173          * If we find a slosh, and it's not the last byte in the string,
2174          * it's escaping something - advance past both bytes.
2175          */
2176         if ((*inchr == '\\') && (inchr[1] != '\0')) {
2177             inchr++;
2178             newlen++;
2179         }
2180         inchr++;
2181     }
2182     outstring = apr_palloc(p, newlen + 1);
2183     inchr = instring;
2184     outchr = outstring;
2185     /*
2186      * Now copy the input string to the output string, inserting a slosh
2187      * in front of every " that doesn't already have one.
2188      */
2189     while (*inchr != '\0') {
2190         if ((*inchr == '\\') && (inchr[1] != '\0')) {
2191             *outchr++ = *inchr++;
2192             *outchr++ = *inchr++;
2193         }
2194         if (*inchr == '"') {
2195             *outchr++ = '\\';
2196         }
2197         if (*inchr != '\0') {
2198             *outchr++ = *inchr++;
2199         }
2200     }
2201     *outchr = '\0';
2202     return outstring;
2203 }
2204
2205 /*
2206  * Given a string, append the PID deliminated by delim.
2207  * Usually used to create a pid-appended filepath name
2208  * (eg: /a/b/foo -> /a/b/foo.6726). A function, and not
2209  * a macro, to avoid unistd.h dependency
2210  */
2211 AP_DECLARE(char *) ap_append_pid(apr_pool_t *p, const char *string,
2212                                     const char *delim)
2213 {
2214     return apr_psprintf(p, "%s%s%" APR_PID_T_FMT, string,
2215                         delim, getpid());
2216
2217 }
2218
2219 /**
2220  * Parse a given timeout parameter string into an apr_interval_time_t value.
2221  * The unit of the time interval is given as postfix string to the numeric
2222  * string. Currently the following units are understood:
2223  *
2224  * ms    : milliseconds
2225  * s     : seconds
2226  * mi[n] : minutes
2227  * h     : hours
2228  *
2229  * If no unit is contained in the given timeout parameter the default_time_unit
2230  * will be used instead.
2231  * @param timeout_parameter The string containing the timeout parameter.
2232  * @param timeout The timeout value to be returned.
2233  * @param default_time_unit The default time unit to use if none is specified
2234  * in timeout_parameter.
2235  * @return Status value indicating whether the parsing was successful or not.
2236  */
2237 AP_DECLARE(apr_status_t) ap_timeout_parameter_parse(
2238                                                const char *timeout_parameter,
2239                                                apr_interval_time_t *timeout,
2240                                                const char *default_time_unit)
2241 {
2242     char *endp;
2243     const char *time_str;
2244     apr_int64_t tout;
2245
2246     tout = apr_strtoi64(timeout_parameter, &endp, 10);
2247     if (errno) {
2248         return errno;
2249     }
2250     if (!endp || !*endp) {
2251         time_str = default_time_unit;
2252     }
2253     else {
2254         time_str = endp;
2255     }
2256
2257     switch (*time_str) {
2258         /* Time is in seconds */
2259     case 's':
2260         *timeout = (apr_interval_time_t) apr_time_from_sec(tout);
2261         break;
2262     case 'h':
2263         /* Time is in hours */
2264         *timeout = (apr_interval_time_t) apr_time_from_sec(tout * 3600);
2265         break;
2266     case 'm':
2267         switch (*(++time_str)) {
2268         /* Time is in milliseconds */
2269         case 's':
2270             *timeout = (apr_interval_time_t) tout * 1000;
2271             break;
2272         /* Time is in minutes */
2273         case 'i':
2274             *timeout = (apr_interval_time_t) apr_time_from_sec(tout * 60);
2275             break;
2276         default:
2277             return APR_EGENERAL;
2278         }
2279         break;
2280     default:
2281         return APR_EGENERAL;
2282     }
2283     return APR_SUCCESS;
2284 }
2285
2286 /**
2287  * Determine if a request has a request body or not.
2288  *
2289  * @param r the request_rec of the request
2290  * @return truth value
2291  */
2292 AP_DECLARE(int) ap_request_has_body(request_rec *r)
2293 {
2294     apr_off_t cl;
2295     char *estr;
2296     const char *cls;
2297     int has_body;
2298
2299     has_body = (!r->header_only
2300                 && (r->kept_body
2301                     || apr_table_get(r->headers_in, "Transfer-Encoding")
2302                     || ( (cls = apr_table_get(r->headers_in, "Content-Length"))
2303                         && (apr_strtoff(&cl, cls, &estr, 10) == APR_SUCCESS)
2304                         && (!*estr)
2305                         && (cl > 0) )
2306                     )
2307                 );
2308     return has_body;
2309 }
2310
2311 AP_DECLARE_NONSTD(apr_status_t) ap_pool_cleanup_set_null(void *data_)
2312 {
2313     void **ptr = (void **)data_;
2314     *ptr = NULL;
2315     return APR_SUCCESS;
2316 }
2317
2318 AP_DECLARE(apr_status_t) ap_str2_alnum(const char *src, char *dest) {
2319
2320     for ( ; *src; src++, dest++)
2321     {
2322         if (!apr_isprint(*src))
2323             *dest = 'x';
2324         else if (!apr_isalnum(*src))
2325             *dest = '_';
2326         else
2327             *dest = (char)*src;
2328     }
2329     *dest = '\0';
2330     return APR_SUCCESS;
2331
2332 }
2333
2334 AP_DECLARE(apr_status_t) ap_pstr2_alnum(apr_pool_t *p, const char *src,
2335                                         const char **dest)
2336 {
2337     char *new = apr_palloc(p, strlen(src)+1);
2338     if (!new)
2339         return APR_ENOMEM;
2340     *dest = new;
2341     return ap_str2_alnum(src, new);
2342 }
2343
2344 /**
2345  * Read the body and parse any form found, which must be of the
2346  * type application/x-www-form-urlencoded.
2347  *
2348  * Name/value pairs are returned in an array, with the names as
2349  * strings with a maximum length of HUGE_STRING_LEN, and the
2350  * values as bucket brigades. This allows values to be arbitrarily
2351  * large.
2352  *
2353  * All url-encoding is removed from both the names and the values
2354  * on the fly. The names are interpreted as strings, while the
2355  * values are interpreted as blocks of binary data, that may
2356  * contain the 0 character.
2357  *
2358  * In order to ensure that resource limits are not exceeded, a
2359  * maximum size must be provided. If the sum of the lengths of
2360  * the names and the values exceed this size, this function
2361  * will return HTTP_REQUEST_ENTITY_TOO_LARGE.
2362  *
2363  * An optional number of parameters can be provided, if the number
2364  * of parameters provided exceeds this amount, this function will
2365  * return HTTP_REQUEST_ENTITY_TOO_LARGE. If this value is negative,
2366  * no limit is imposed, and the number of parameters is in turn
2367  * constrained by the size parameter above.
2368  *
2369  * This function honours any kept_body configuration, and the
2370  * original raw request body will be saved to the kept_body brigade
2371  * if so configured, just as ap_discard_request_body does.
2372  *
2373  * NOTE: File upload is not yet supported, but can be without change
2374  * to the function call.
2375  */
2376
2377 /* form parsing stuff */
2378 typedef enum {
2379     FORM_NORMAL,
2380     FORM_AMP,
2381     FORM_NAME,
2382     FORM_VALUE,
2383     FORM_PERCENTA,
2384     FORM_PERCENTB,
2385     FORM_ABORT
2386 } ap_form_type_t;
2387
2388 AP_DECLARE(int) ap_parse_form_data(request_rec *r, ap_filter_t *f,
2389                                    apr_array_header_t **ptr,
2390                                    apr_size_t num, apr_size_t usize)
2391 {
2392     apr_bucket_brigade *bb = NULL;
2393     int seen_eos = 0;
2394     char buffer[HUGE_STRING_LEN + 1];
2395     const char *ct;
2396     apr_size_t offset = 0;
2397     apr_ssize_t size;
2398     ap_form_type_t state = FORM_NAME, percent = FORM_NORMAL;
2399     ap_form_pair_t *pair = NULL;
2400     apr_array_header_t *pairs = apr_array_make(r->pool, 4, sizeof(ap_form_pair_t));
2401
2402     char hi = 0;
2403     char low = 0;
2404
2405     *ptr = pairs;
2406
2407     /* sanity check - we only support forms for now */
2408     ct = apr_table_get(r->headers_in, "Content-Type");
2409     if (!ct || strcmp("application/x-www-form-urlencoded", ct)) {
2410         return ap_discard_request_body(r);
2411     }
2412
2413     if (usize > APR_SIZE_MAX >> 1)
2414         size = APR_SIZE_MAX >> 1;
2415     else
2416         size = usize;
2417
2418     if (!f) {
2419         f = r->input_filters;
2420     }
2421
2422     bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2423     do {
2424         apr_bucket *bucket = NULL, *last = NULL;
2425
2426         int rv = ap_get_brigade(f, bb, AP_MODE_READBYTES,
2427                                 APR_BLOCK_READ, HUGE_STRING_LEN);
2428         if (rv != APR_SUCCESS) {
2429             apr_brigade_destroy(bb);
2430             return (rv == AP_FILTER_ERROR) ? rv : HTTP_BAD_REQUEST;
2431         }
2432
2433         for (bucket = APR_BRIGADE_FIRST(bb);
2434              bucket != APR_BRIGADE_SENTINEL(bb);
2435              last = bucket, bucket = APR_BUCKET_NEXT(bucket)) {
2436             const char *data;
2437             apr_size_t len, slide;
2438
2439             if (last) {
2440                 apr_bucket_delete(last);
2441             }
2442             if (APR_BUCKET_IS_EOS(bucket)) {
2443                 seen_eos = 1;
2444                 break;
2445             }
2446             if (bucket->length == 0) {
2447                 continue;
2448             }
2449
2450             rv = apr_bucket_read(bucket, &data, &len, APR_BLOCK_READ);
2451             if (rv != APR_SUCCESS) {
2452                 apr_brigade_destroy(bb);
2453                 return HTTP_BAD_REQUEST;
2454             }
2455
2456             slide = len;
2457             while (state != FORM_ABORT && slide-- > 0 && size >= 0 && num != 0) {
2458                 char c = *data++;
2459                 if ('+' == c) {
2460                     c = ' ';
2461                 }
2462                 else if ('&' == c) {
2463                     state = FORM_AMP;
2464                 }
2465                 if ('%' == c) {
2466                     percent = FORM_PERCENTA;
2467                     continue;
2468                 }
2469                 if (FORM_PERCENTA == percent) {
2470                     if (c >= 'a') {
2471                         hi = c - 'a' + 10;
2472                     }
2473                     else if (c >= 'A') {
2474                         hi = c - 'A' + 10;
2475                     }
2476                     else if (c >= '0') {
2477                         hi = c - '0';
2478                     }
2479                     hi = hi << 4;
2480                     percent = FORM_PERCENTB;
2481                     continue;
2482                 }
2483                 if (FORM_PERCENTB == percent) {
2484                     if (c >= 'a') {
2485                         low = c - 'a' + 10;
2486                     }
2487                     else if (c >= 'A') {
2488                         low = c - 'A' + 10;
2489                     }
2490                     else if (c >= '0') {
2491                         low = c - '0';
2492                     }
2493                     c = low | hi;
2494                     percent = FORM_NORMAL;
2495                 }
2496                 switch (state) {
2497                     case FORM_AMP:
2498                         if (pair) {
2499                             const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2500                             apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2501                             APR_BRIGADE_INSERT_TAIL(pair->value, b);
2502                         }
2503                         state = FORM_NAME;
2504                         pair = NULL;
2505                         offset = 0;
2506                         num--;
2507                         break;
2508                     case FORM_NAME:
2509                         if (offset < HUGE_STRING_LEN) {
2510                             if ('=' == c) {
2511                                 buffer[offset] = 0;
2512                                 offset = 0;
2513                                 pair = (ap_form_pair_t *) apr_array_push(pairs);
2514                                 pair->name = apr_pstrdup(r->pool, buffer);
2515                                 pair->value = apr_brigade_create(r->pool, r->connection->bucket_alloc);
2516                                 state = FORM_VALUE;
2517                             }
2518                             else {
2519                                 buffer[offset++] = c;
2520                                 size--;
2521                             }
2522                         }
2523                         else {
2524                             state = FORM_ABORT;
2525                         }
2526                         break;
2527                     case FORM_VALUE:
2528                         if (offset >= HUGE_STRING_LEN) {
2529                             const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2530                             apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2531                             APR_BRIGADE_INSERT_TAIL(pair->value, b);
2532                             offset = 0;
2533                         }
2534                         buffer[offset++] = c;
2535                         size--;
2536                         break;
2537                     default:
2538                         break;
2539                 }
2540             }
2541
2542         }
2543
2544         apr_brigade_cleanup(bb);
2545     } while (!seen_eos);
2546
2547     if (FORM_ABORT == state || size < 0 || num == 0) {
2548         return HTTP_REQUEST_ENTITY_TOO_LARGE;
2549     }
2550     else if (FORM_VALUE == state && pair && offset > 0) {
2551         const char *tmp = apr_pmemdup(r->pool, buffer, offset);
2552         apr_bucket *b = apr_bucket_pool_create(tmp, offset, r->pool, r->connection->bucket_alloc);
2553         APR_BRIGADE_INSERT_TAIL(pair->value, b);
2554     }
2555
2556     return OK;
2557
2558 }
2559
2560 #define VARBUF_SMALL_SIZE 2048
2561 #define VARBUF_MAX_SIZE   (APR_SIZE_MAX - 1 -                                \
2562                            APR_ALIGN_DEFAULT(sizeof(struct ap_varbuf_info)))
2563
2564 struct ap_varbuf_info {
2565     struct apr_memnode_t *node;
2566     apr_allocator_t *allocator;
2567 };
2568
2569 static apr_status_t varbuf_cleanup(void *info_)
2570 {
2571     struct ap_varbuf_info *info = info_;
2572     info->node->next = NULL;
2573     apr_allocator_free(info->allocator, info->node);
2574     return APR_SUCCESS;
2575 }
2576
2577 const char nul = '\0';
2578 static char * const varbuf_empty = (char *)&nul;
2579
2580 AP_DECLARE(void) ap_varbuf_init(apr_pool_t *p, struct ap_varbuf *vb,
2581                                 apr_size_t init_size)
2582 {
2583     vb->buf = varbuf_empty;
2584     vb->avail = 0;
2585     vb->strlen = AP_VARBUF_UNKNOWN;
2586     vb->pool = p;
2587     vb->info = NULL;
2588
2589     ap_varbuf_grow(vb, init_size);
2590 }
2591
2592 AP_DECLARE(void) ap_varbuf_grow(struct ap_varbuf *vb, apr_size_t new_len)
2593 {
2594     apr_memnode_t *new_node = NULL;
2595     apr_allocator_t *allocator;
2596     struct ap_varbuf_info *new_info;
2597     char *new;
2598
2599     AP_DEBUG_ASSERT(vb->strlen == AP_VARBUF_UNKNOWN || vb->avail >= vb->strlen);
2600
2601     if (new_len <= vb->avail)
2602         return;
2603
2604     if (new_len < 2 * vb->avail && vb->avail < VARBUF_MAX_SIZE/2) {
2605         /* at least double the size, to avoid repeated reallocations */
2606         new_len = 2 * vb->avail;
2607     }
2608     else if (new_len > VARBUF_MAX_SIZE) {
2609         apr_abortfunc_t abort_fn = apr_pool_abort_get(vb->pool);
2610         ap_assert(abort_fn != NULL);
2611         abort_fn(APR_ENOMEM);
2612         return;
2613     }
2614
2615     new_len++;  /* add space for trailing \0 */
2616     if (new_len <= VARBUF_SMALL_SIZE) {
2617         new_len = APR_ALIGN_DEFAULT(new_len);
2618         new = apr_palloc(vb->pool, new_len);
2619         if (vb->avail && vb->strlen != 0) {
2620             AP_DEBUG_ASSERT(vb->buf != NULL);
2621             AP_DEBUG_ASSERT(vb->buf != varbuf_empty);
2622             if (new == vb->buf + vb->avail + 1) {
2623                 /* We are lucky: the new memory lies directly after our old
2624                  * buffer, we can now use both.
2625                  */
2626                 vb->avail += new_len;
2627                 return;
2628             }
2629             else {
2630                 /* copy up to vb->strlen + 1 bytes */
2631                 memcpy(new, vb->buf, vb->strlen == AP_VARBUF_UNKNOWN ?
2632                                      vb->avail + 1 : vb->strlen + 1);
2633             }
2634         }
2635         else {
2636             *new = '\0';
2637         }
2638         vb->avail = new_len - 1;
2639         vb->buf = new;
2640         return;
2641     }
2642
2643     /* The required block is rather larger. Use allocator directly so that
2644      * the memory can be freed independently from the pool. */
2645     allocator = apr_pool_allocator_get(vb->pool);
2646     if (new_len <= VARBUF_MAX_SIZE)
2647         new_node = apr_allocator_alloc(allocator,
2648                                        new_len + APR_ALIGN_DEFAULT(sizeof(*new_info)));
2649     if (!new_node) {
2650         apr_abortfunc_t abort_fn = apr_pool_abort_get(vb->pool);
2651         ap_assert(abort_fn != NULL);
2652         abort_fn(APR_ENOMEM);
2653         return;
2654     }
2655     new_info = (struct ap_varbuf_info *)new_node->first_avail;
2656     new_node->first_avail += APR_ALIGN_DEFAULT(sizeof(*new_info));
2657     new_info->node = new_node;
2658     new_info->allocator = allocator;
2659     new = new_node->first_avail;
2660     AP_DEBUG_ASSERT(new_node->endp - new_node->first_avail >= new_len);
2661     new_len = new_node->endp - new_node->first_avail;
2662
2663     if (vb->avail && vb->strlen != 0)
2664         memcpy(new, vb->buf, vb->strlen == AP_VARBUF_UNKNOWN ?
2665                              vb->avail + 1 : vb->strlen + 1);
2666     else
2667         *new = '\0';
2668     if (vb->info)
2669         apr_pool_cleanup_run(vb->pool, vb->info, varbuf_cleanup);
2670     apr_pool_cleanup_register(vb->pool, new_info, varbuf_cleanup,
2671                               apr_pool_cleanup_null);
2672     vb->info = new_info;
2673     vb->buf = new;
2674     vb->avail = new_len - 1;
2675 }
2676
2677 AP_DECLARE(void) ap_varbuf_strmemcat(struct ap_varbuf *vb, const char *str,
2678                                      int len)
2679 {
2680     if (len == 0)
2681         return;
2682     if (!vb->avail) {
2683         ap_varbuf_grow(vb, len);
2684         memcpy(vb->buf, str, len);
2685         vb->buf[len] = '\0';
2686         vb->strlen = len;
2687         return;
2688     }
2689     if (vb->strlen == AP_VARBUF_UNKNOWN)
2690         vb->strlen = strlen(vb->buf);
2691     ap_varbuf_grow(vb, vb->strlen + len);
2692     memcpy(vb->buf + vb->strlen, str, len);
2693     vb->strlen += len;
2694     vb->buf[vb->strlen] = '\0';
2695 }
2696
2697 AP_DECLARE(void) ap_varbuf_free(struct ap_varbuf *vb)
2698 {
2699     if (vb->info) {
2700         apr_pool_cleanup_run(vb->pool, vb->info, varbuf_cleanup);
2701         vb->info = NULL;
2702     }
2703     vb->buf = NULL;
2704 }
2705
2706 AP_DECLARE(char *) ap_varbuf_pdup(apr_pool_t *p, struct ap_varbuf *buf,
2707                                   const char *prepend, apr_size_t prepend_len,
2708                                   const char *append, apr_size_t append_len,
2709                                   apr_size_t *new_len)
2710 {
2711     apr_size_t i = 0;
2712     struct iovec vec[3];
2713
2714     if (prepend) {
2715         vec[i].iov_base = (void *)prepend;
2716         vec[i].iov_len = prepend_len;
2717         i++;
2718     }
2719     if (buf->avail && buf->strlen) {
2720         if (buf->strlen == AP_VARBUF_UNKNOWN)
2721             buf->strlen = strlen(buf->buf);
2722         vec[i].iov_base = (void *)buf->buf;
2723         vec[i].iov_len = buf->strlen;
2724         i++;
2725     }
2726     if (append) {
2727         vec[i].iov_base = (void *)append;
2728         vec[i].iov_len = append_len;
2729         i++;
2730     }
2731     if (i)
2732         return apr_pstrcatv(p, vec, i, new_len);
2733
2734     if (new_len)
2735         *new_len = 0;
2736     return "";
2737 }
2738
2739 AP_DECLARE(apr_status_t) ap_varbuf_regsub(struct ap_varbuf *vb,
2740                                           const char *input,
2741                                           const char *source,
2742                                           apr_size_t nmatch,
2743                                           ap_regmatch_t pmatch[],
2744                                           apr_size_t maxlen)
2745 {
2746     return regsub_core(NULL, NULL, vb, input, source, nmatch, pmatch, maxlen);
2747 }
2748
2749 static const char * const oom_message = "[crit] Memory allocation failed, "
2750                                         "aborting process." APR_EOL_STR;
2751
2752 AP_DECLARE(void) ap_abort_on_oom()
2753 {
2754     int written, count = strlen(oom_message);
2755     const char *buf = oom_message;
2756     do {
2757         written = write(STDERR_FILENO, buf, count);
2758         if (written == count)
2759             break;
2760         if (written > 0) {
2761             buf += written;
2762             count -= written;
2763         }
2764     } while (written >= 0 || errno == EINTR);
2765     abort();
2766 }
2767
2768 AP_DECLARE(void *) ap_malloc(size_t size)
2769 {
2770     void *p = malloc(size);
2771     if (p == NULL && size != 0)
2772         ap_abort_on_oom();
2773     return p;
2774 }
2775
2776 AP_DECLARE(void *) ap_calloc(size_t nelem, size_t size)
2777 {
2778     void *p = calloc(nelem, size);
2779     if (p == NULL && nelem != 0 && size != 0)
2780         ap_abort_on_oom();
2781     return p;
2782 }
2783
2784 AP_DECLARE(void *) ap_realloc(void *ptr, size_t size)
2785 {
2786     void *p = realloc(ptr, size);
2787     if (p == NULL && size != 0)
2788         ap_abort_on_oom();
2789     return p;
2790 }