granicus.if.org Git - apache/blob - server/util.c

   1 /* ====================================================================
   2  * The Apache Software License, Version 1.1
   3  *
   4  * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
   5  * reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  *
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * 3. The end-user documentation included with the redistribution,
  20  *    if any, must include the following acknowledgment:
  21  *       "This product includes software developed by the
  22  *        Apache Software Foundation (http://www.apache.org/)."
  23  *    Alternately, this acknowledgment may appear in the software itself,
  24  *    if and wherever such third-party acknowledgments normally appear.
  25  *
  26  * 4. The names "Apache" and "Apache Software Foundation" must
  27  *    not be used to endorse or promote products derived from this
  28  *    software without prior written permission. For written
  29  *    permission, please contact apache@apache.org.
  30  *
  31  * 5. Products derived from this software may not be called "Apache",
  32  *    nor may "Apache" appear in their name, without prior written
  33  *    permission of the Apache Software Foundation.
  34  *
  35  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  * ====================================================================
  48  *
  49  * This software consists of voluntary contributions made by many
  50  * individuals on behalf of the Apache Software Foundation.  For more
  51  * information on the Apache Software Foundation, please see
  52  * <http://www.apache.org/>.
  53  *
  54  * Portions of this software are based upon public domain software
  55  * originally written at the National Center for Supercomputing Applications,
  56  * University of Illinois, Urbana-Champaign.
  57  */
  58
  59 /*
  60  * util.c: string utility things
  61  *
  62  * 3/21/93 Rob McCool
  63  * 1995-96 Many changes by the Apache Software Foundation
  64  *
  65  */
  66
  67 /* Debugging aid:
  68  * #define DEBUG            to trace all cfg_open*()/cfg_closefile() calls
  69  * #define DEBUG_CFG_LINES  to trace every line read from the config files
  70  */
  71
  72 #include "apr.h"
  73 #include "apr_strings.h"
  74 #include "apr_lib.h"
  75
  76 #define APR_WANT_STDIO
  77 #define APR_WANT_STRFUNC
  78 #include "apr_want.h"
  79
  80 #if APR_HAVE_UNISTD_H
  81 #include <unistd.h>
  82 #endif
  83 #if APR_HAVE_NETDB_H
  84 #include <netdb.h>              /* for gethostbyname() */
  85 #endif
  86
  87 #define CORE_PRIVATE
  88
  89 #include "ap_config.h"
  90 #include "apr_base64.h"
  91 #include "httpd.h"
  92 #include "http_main.h"
  93 #include "http_log.h"
  94 #include "http_protocol.h"
  95 #include "http_config.h"
  96 #include "util_ebcdic.h"
  97
  98 #ifdef HAVE_PWD_H
  99 #include <pwd.h>
 100 #endif
 101 #ifdef HAVE_GRP_H
 102 #include <grp.h>
 103 #endif
 104
 105 /* A bunch of functions in util.c scan strings looking for certain characters.
 106  * To make that more efficient we encode a lookup table.  The test_char_table
 107  * is generated automatically by gen_test_char.c.
 108  */
 109 #include "test_char.h"
 110
 111 /* we assume the folks using this ensure 0 <= c < 256... which means
 112  * you need a cast to (unsigned char) first, you can't just plug a
 113  * char in here and get it to work, because if char is signed then it
 114  * will first be sign extended.
 115  */
 116 #define TEST_CHAR(c, f)        (test_char_table[(unsigned)(c)] & (f))
 117
 118 /* Win32/NetWare/OS2 need to check for both forward and back slashes
 119  * in ap_getparents() and ap_escape_url.
 120  */
 121 #ifdef CASE_BLIND_FILESYSTEM
 122 #define IS_SLASH(s) ((s == '/') || (s == '\\'))
 123 #else
 124 #define IS_SLASH(s) (s == '/')
 125 #endif
 126
 127
 128 /*
 129  * Examine a field value (such as a media-/content-type) string and return
 130  * it sans any parameters; e.g., strip off any ';charset=foo' and the like.
 131  */
 132 AP_DECLARE(char *) ap_field_noparam(apr_pool_t *p, const char *intype)
 133 {
 134     const char *semi;
 135
 136     if (intype == NULL) return NULL;
 137
 138     semi = ap_strchr_c(intype, ';');
 139     if (semi == NULL) {
 140         return apr_pstrdup(p, intype);
 141     }
 142     else {
 143         while ((semi > intype) && apr_isspace(semi[-1])) {
 144             semi--;
 145         }
 146         return apr_pstrndup(p, intype, semi - intype);
 147     }
 148 }
 149
 150 AP_DECLARE(char *) ap_ht_time(apr_pool_t *p, apr_time_t t, const char *fmt,
 151                               int gmt)
 152 {
 153     apr_size_t retcode;
 154     char ts[MAX_STRING_LEN];
 155     char tf[MAX_STRING_LEN];
 156     apr_time_exp_t xt;
 157
 158     if (gmt) {
 159         const char *f;
 160         char *strp;
 161
 162         apr_time_exp_gmt(&xt, t);
 163         /* Convert %Z to "GMT" and %z to "+0000";
 164          * on hosts that do not have a time zone string in struct tm,
 165          * strftime must assume its argument is local time.
 166          */
 167         for(strp = tf, f = fmt; strp < tf + sizeof(tf) - 6 && (*strp = *f)
 168             ; f++, strp++) {
 169             if (*f != '%') continue;
 170             switch (f[1]) {
 171             case '%':
 172                 *++strp = *++f;
 173                 break;
 174             case 'Z':
 175                 *strp++ = 'G';
 176                 *strp++ = 'M';
 177                 *strp = 'T';
 178                 f++;
 179                 break;
 180             case 'z': /* common extension */
 181                 *strp++ = '+';
 182                 *strp++ = '0';
 183                 *strp++ = '0';
 184                 *strp++ = '0';
 185                 *strp = '0';
 186                 f++;
 187                 break;
 188             }
 189         }
 190         *strp = '\0';
 191         fmt = tf;
 192     }
 193     else {
 194         apr_time_exp_lt(&xt, t);
 195     }
 196
 197     /* check return code? */
 198     apr_strftime(ts, &retcode, MAX_STRING_LEN, fmt, &xt);
 199     ts[MAX_STRING_LEN - 1] = '\0';
 200     return apr_pstrdup(p, ts);
 201 }
 202
 203 /* Roy owes Rob beer. */
 204 /* Rob owes Roy dinner. */
 205
 206 /* These legacy comments would make a lot more sense if Roy hadn't
 207  * replaced the old later_than() routine with util_date.c.
 208  *
 209  * Well, okay, they still wouldn't make any sense.
 210  */
 211
 212 /* Match = 0, NoMatch = 1, Abort = -1
 213  * Based loosely on sections of wildmat.c by Rich Salz
 214  * Hmmm... shouldn't this really go component by component?
 215  */
 216 AP_DECLARE(int) ap_strcmp_match(const char *str, const char *exp)
 217 {
 218     int x, y;
 219
 220     for (x = 0, y = 0; exp[y]; ++y, ++x) {
 221         if ((!str[x]) && (exp[y] != '*'))
 222             return -1;
 223         if (exp[y] == '*') {
 224             while (exp[++y] == '*');
 225             if (!exp[y])
 226                 return 0;
 227             while (str[x]) {
 228                 int ret;
 229                 if ((ret = ap_strcmp_match(&str[x++], &exp[y])) != 1)
 230                     return ret;
 231             }
 232             return -1;
 233         }
 234         else if ((exp[y] != '?') && (str[x] != exp[y]))
 235             return 1;
 236     }
 237     return (str[x] != '\0');
 238 }
 239
 240 AP_DECLARE(int) ap_strcasecmp_match(const char *str, const char *exp)
 241 {
 242     int x, y;
 243
 244     for (x = 0, y = 0; exp[y]; ++y, ++x) {
 245         if (!str[x] && exp[y] != '*')
 246             return -1;
 247         if (exp[y] == '*') {
 248             while (exp[++y] == '*');
 249             if (!exp[y])
 250                 return 0;
 251             while (str[x]) {
 252                 int ret;
 253                 if ((ret = ap_strcasecmp_match(&str[x++], &exp[y])) != 1)
 254                     return ret;
 255             }
 256             return -1;
 257         }
 258         else if (exp[y] != '?'
 259                  && apr_tolower(str[x]) != apr_tolower(exp[y]))
 260             return 1;
 261     }
 262     return (str[x] != '\0');
 263 }
 264
 265 /* We actually compare the canonical root to this root, (but we don't
 266  * waste time checking the case), since every use of this function in
 267  * httpd-2.0 tests if the path is 'proper', meaning we've already passed
 268  * it through apr_filepath_merge, or we haven't.
 269  */
 270 AP_DECLARE(int) ap_os_is_path_absolute(apr_pool_t *p, const char *dir)
 271 {
 272     const char *newpath;
 273     const char *ourdir = dir;
 274     if (apr_filepath_root(&newpath, &dir, 0, p) != APR_SUCCESS
 275             || strncmp(newpath, ourdir, strlen(newpath)) != 0) {
 276         return 0;
 277     }
 278     return 1;
 279 }
 280
 281 AP_DECLARE(int) ap_is_matchexp(const char *str)
 282 {
 283     register int x;
 284
 285     for (x = 0; str[x]; x++)
 286         if ((str[x] == '*') || (str[x] == '?'))
 287             return 1;
 288     return 0;
 289 }
 290
 291 /*
 292  * Here's a pool-based interface to POSIX regex's regcomp().
 293  * Note that we return regex_t instead of being passed one.
 294  * The reason is that if you use an already-used regex_t structure,
 295  * the memory that you've already allocated gets forgotten, and
 296  * regfree() doesn't clear it. So we don't allow it.
 297  */
 298
 299 static apr_status_t regex_cleanup(void *preg)
 300 {
 301     regfree((regex_t *) preg);
 302     return APR_SUCCESS;
 303 }
 304
 305 AP_DECLARE(regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
 306                                    int cflags)
 307 {
 308     regex_t *preg = apr_palloc(p, sizeof(regex_t));
 309
 310     if (regcomp(preg, pattern, cflags)) {
 311         return NULL;
 312     }
 313
 314     apr_pool_cleanup_register(p, (void *) preg, regex_cleanup, regex_cleanup);
 315
 316     return preg;
 317 }
 318
 319 AP_DECLARE(void) ap_pregfree(apr_pool_t *p, regex_t * reg)
 320 {
 321     regfree(reg);
 322     apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup);
 323 }
 324
 325 /*
 326  * Similar to standard strstr() but we ignore case in this version.
 327  * Based on the strstr() implementation further below.
 328  */
 329 AP_DECLARE(char *) ap_strcasestr(const char *s1, const char *s2)
 330 {
 331     char *p1, *p2;
 332     if (*s2 == '\0') {
 333         /* an empty s2 */
 334         return((char *)s1);
 335     }
 336     while(1) {
 337         for ( ; (*s1 != '\0') && (apr_tolower(*s1) != apr_tolower(*s2)); s1++);
 338         if (*s1 == '\0') {
 339             return(NULL);
 340         }
 341         /* found first character of s2, see if the rest matches */
 342         p1 = (char *)s1;
 343         p2 = (char *)s2;
 344         for (++p1, ++p2; apr_tolower(*p1) == apr_tolower(*p2); ++p1, ++p2) {
 345             if (*p1 == '\0') {
 346                 /* both strings ended together */
 347                 return((char *)s1);
 348             }
 349         }
 350         if (*p2 == '\0') {
 351             /* second string ended, a match */
 352             break;
 353         }
 354         /* didn't find a match here, try starting at next character in s1 */
 355         s1++;
 356     }
 357     return((char *)s1);
 358 }
 359
 360 /*
 361  * Returns an offsetted pointer in bigstring immediately after
 362  * prefix. Returns bigstring if bigstring doesn't start with
 363  * prefix or if prefix is longer than bigstring while still matching.
 364  * NOTE: pointer returned is relative to bigstring, so we
 365  * can use standard pointer comparisons in the calling function
 366  * (eg: test if ap_stripprefix(a,b) == a)
 367  */
 368 AP_DECLARE(const char *) ap_stripprefix(const char *bigstring,
 369                                         const char *prefix)
 370 {
 371     const char *p1;
 372
 373     if (*prefix == '\0')
 374         return bigstring;
 375
 376     p1 = bigstring;
 377     while (*p1 && *prefix) {
 378         if (*p1++ != *prefix++)
 379             return bigstring;
 380     }
 381     if (*prefix == '\0')
 382         return p1;
 383
 384     /* hit the end of bigstring! */
 385     return bigstring;
 386 }
 387
 388 /*
 389  * Apache stub function for the regex libraries regexec() to make sure the
 390  * whole regex(3) API is available through the Apache (exported) namespace.
 391  * This is especially important for the DSO situations of modules.
 392  * DO NOT MAKE A MACRO OUT OF THIS FUNCTION!
 393  */
 394 AP_DECLARE(int) ap_regexec(regex_t *preg, const char *string,
 395                            size_t nmatch, regmatch_t pmatch[], int eflags)
 396 {
 397     return regexec(preg, string, nmatch, pmatch, eflags);
 398 }
 399
 400 AP_DECLARE(size_t) ap_regerror(int errcode, const regex_t *preg, char *errbuf,
 401                                size_t errbuf_size)
 402 {
 403     return regerror(errcode, preg, errbuf, errbuf_size);
 404 }
 405
 406
 407 /* This function substitutes for $0-$9, filling in regular expression
 408  * submatches. Pass it the same nmatch and pmatch arguments that you
 409  * passed ap_regexec(). pmatch should not be greater than the maximum number
 410  * of subexpressions - i.e. one more than the re_nsub member of regex_t.
 411  *
 412  * input should be the string with the $-expressions, source should be the
 413  * string that was matched against.
 414  *
 415  * It returns the substituted string, or NULL on error.
 416  *
 417  * Parts of this code are based on Henry Spencer's regsub(), from his
 418  * AT&T V8 regexp package.
 419  */
 420
 421 AP_DECLARE(char *) ap_pregsub(apr_pool_t *p, const char *input,
 422                               const char *source, size_t nmatch,
 423                               regmatch_t pmatch[])
 424 {
 425     const char *src = input;
 426     char *dest, *dst;
 427     char c;
 428     size_t no;
 429     int len;
 430
 431     if (!source)
 432         return NULL;
 433     if (!nmatch)
 434         return apr_pstrdup(p, src);
 435
 436     /* First pass, find the size */
 437
 438     len = 0;
 439
 440     while ((c = *src++) != '\0') {
 441         if (c == '&')
 442             no = 0;
 443         else if (c == '$' && apr_isdigit(*src))
 444             no = *src++ - '0';
 445         else
 446             no = 10;
 447
 448         if (no > 9) {                /* Ordinary character. */
 449             if (c == '\\' && (*src == '$' || *src == '&'))
 450                 c = *src++;
 451             len++;
 452         }
 453         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 454             len += pmatch[no].rm_eo - pmatch[no].rm_so;
 455         }
 456
 457     }
 458
 459     dest = dst = apr_pcalloc(p, len + 1);
 460
 461     /* Now actually fill in the string */
 462
 463     src = input;
 464
 465     while ((c = *src++) != '\0') {
 466         if (c == '&')
 467             no = 0;
 468         else if (c == '$' && apr_isdigit(*src))
 469             no = *src++ - '0';
 470         else
 471             no = 10;
 472
 473         if (no > 9) {                /* Ordinary character. */
 474             if (c == '\\' && (*src == '$' || *src == '&'))
 475                 c = *src++;
 476             *dst++ = c;
 477         }
 478         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
 479             len = pmatch[no].rm_eo - pmatch[no].rm_so;
 480             memcpy(dst, source + pmatch[no].rm_so, len);
 481             dst += len;
 482         }
 483
 484     }
 485     *dst = '\0';
 486
 487     return dest;
 488 }
 489
 490 /*
 491  * Parse .. so we don't compromise security
 492  */
 493 AP_DECLARE(void) ap_getparents(char *name)
 494 {
 495     char *next;
 496     int l, w, first_dot;
 497
 498     /* Four paseses, as per RFC 1808 */
 499     /* a) remove ./ path segments */
 500     for (next = name; *next && (*next != '.'); next++) {
 501     }
 502
 503     l = w = first_dot = next - name;
 504     while (name[l] != '\0') {
 505         if (name[l] == '.' && IS_SLASH(name[l + 1])
 506             && (l == 0 || IS_SLASH(name[l - 1])))
 507             l += 2;
 508         else
 509             name[w++] = name[l++];
 510     }
 511
 512     /* b) remove trailing . path, segment */
 513     if (w == 1 && name[0] == '.')
 514         w--;
 515     else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
 516         w--;
 517     name[w] = '\0';
 518
 519     /* c) remove all xx/../ segments. (including leading ../ and /../) */
 520     l = first_dot;
 521
 522     while (name[l] != '\0') {
 523         if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
 524             && (l == 0 || IS_SLASH(name[l - 1]))) {
 525             register int m = l + 3, n;
 526
 527             l = l - 2;
 528             if (l >= 0) {
 529                 while (l >= 0 && !IS_SLASH(name[l]))
 530                     l--;
 531                 l++;
 532             }
 533             else
 534                 l = 0;
 535             n = l;
 536             while ((name[n] = name[m]))
 537                 (++n, ++m);
 538         }
 539         else
 540             ++l;
 541     }
 542
 543     /* d) remove trailing xx/.. segment. */
 544     if (l == 2 && name[0] == '.' && name[1] == '.')
 545         name[0] = '\0';
 546     else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
 547              && IS_SLASH(name[l - 3])) {
 548         l = l - 4;
 549         if (l >= 0) {
 550             while (l >= 0 && !IS_SLASH(name[l]))
 551                 l--;
 552             l++;
 553         }
 554         else
 555             l = 0;
 556         name[l] = '\0';
 557     }
 558 }
 559
 560 AP_DECLARE(void) ap_no2slash(char *name)
 561 {
 562     char *d, *s;
 563
 564     s = d = name;
 565
 566 #ifdef HAVE_UNC_PATHS
 567     /* Check for UNC names.  Leave leading two slashes. */
 568     if (s[0] == '/' && s[1] == '/')
 569         *d++ = *s++;
 570 #endif
 571
 572     while (*s) {
 573         if ((*d++ = *s) == '/') {
 574             do {
 575                 ++s;
 576             } while (*s == '/');
 577         }
 578         else {
 579             ++s;
 580         }
 581     }
 582     *d = '\0';
 583 }
 584
 585
 586 /*
 587  * copy at most n leading directories of s into d
 588  * d should be at least as large as s plus 1 extra byte
 589  * assumes n > 0
 590  * the return value is the ever useful pointer to the trailing \0 of d
 591  *
 592  * MODIFIED FOR HAVE_DRIVE_LETTERS and NETWARE environments,
 593  * so that if n == 0, "/" is returned in d with n == 1
 594  * and s == "e:/test.html", "e:/" is returned in d
 595  * *** See also directory_walk in modules/http/http_request.c
 596
 597  * examples:
 598  *    /a/b, 0  ==> /  (true for all platforms)
 599  *    /a/b, 1  ==> /
 600  *    /a/b, 2  ==> /a/
 601  *    /a/b, 3  ==> /a/b/
 602  *    /a/b, 4  ==> /a/b/
 603  *
 604  *    c:/a/b 0 ==> /
 605  *    c:/a/b 1 ==> c:/
 606  *    c:/a/b 2 ==> c:/a/
 607  *    c:/a/b 3 ==> c:/a/b
 608  *    c:/a/b 4 ==> c:/a/b
 609  */
 610 AP_DECLARE(char *) ap_make_dirstr_prefix(char *d, const char *s, int n)
 611 {
 612     if (n < 1) {
 613         *d = '/';
 614         *++d = '\0';
 615         return (d);
 616     }
 617
 618     for (;;) {
 619         if (*s == '\0' || (*s == '/' && (--n) == 0)) {
 620             *d = '/';
 621             break;
 622         }
 623         *d++ = *s++;
 624     }
 625     *++d = 0;
 626     return (d);
 627 }
 628
 629
 630 /*
 631  * return the parent directory name including trailing / of the file s
 632  */
 633 AP_DECLARE(char *) ap_make_dirstr_parent(apr_pool_t *p, const char *s)
 634 {
 635     const char *last_slash = ap_strrchr_c(s, '/');
 636     char *d;
 637     int l;
 638
 639     if (last_slash == NULL) {
 640         return apr_pstrdup(p, "");
 641     }
 642     l = (last_slash - s) + 1;
 643     d = apr_palloc(p, l + 1);
 644     memcpy(d, s, l);
 645     d[l] = 0;
 646     return (d);
 647 }
 648
 649
 650 AP_DECLARE(int) ap_count_dirs(const char *path)
 651 {
 652     register int x, n;
 653
 654     for (x = 0, n = 0; path[x]; x++)
 655         if (path[x] == '/')
 656             n++;
 657     return n;
 658 }
 659
 660 AP_DECLARE(char *) ap_getword_nc(apr_pool_t *atrans, char **line, char stop)
 661 {
 662     return ap_getword(atrans, (const char **) line, stop);
 663 }
 664
 665 AP_DECLARE(char *) ap_getword(apr_pool_t *atrans, const char **line, char stop)
 666 {
 667     const char *pos = *line;
 668     int len;
 669     char *res;
 670
 671     while ((*pos != stop) && *pos) {
 672         ++pos;
 673     }
 674
 675     len = pos - *line;
 676     res = (char *)apr_palloc(atrans, len + 1);
 677     memcpy(res, *line, len);
 678     res[len] = 0;
 679
 680     if (stop) {
 681         while (*pos == stop) {
 682             ++pos;
 683         }
 684     }
 685     *line = pos;
 686
 687     return res;
 688 }
 689
 690 AP_DECLARE(char *) ap_getword_white_nc(apr_pool_t *atrans, char **line)
 691 {
 692     return ap_getword_white(atrans, (const char **) line);
 693 }
 694
 695 AP_DECLARE(char *) ap_getword_white(apr_pool_t *atrans, const char **line)
 696 {
 697     const char *pos = *line;
 698     int len;
 699     char *res;
 700
 701     while (!apr_isspace(*pos) && *pos) {
 702         ++pos;
 703     }
 704
 705     len = pos - *line;
 706     res = (char *)apr_palloc(atrans, len + 1);
 707     memcpy(res, *line, len);
 708     res[len] = 0;
 709
 710     while (apr_isspace(*pos)) {
 711         ++pos;
 712     }
 713
 714     *line = pos;
 715
 716     return res;
 717 }
 718
 719 AP_DECLARE(char *) ap_getword_nulls_nc(apr_pool_t *atrans, char **line,
 720                                        char stop)
 721 {
 722     return ap_getword_nulls(atrans, (const char **) line, stop);
 723 }
 724
 725 AP_DECLARE(char *) ap_getword_nulls(apr_pool_t *atrans, const char **line,
 726                                     char stop)
 727 {
 728     const char *pos = ap_strchr_c(*line, stop);
 729     char *res;
 730
 731     if (!pos) {
 732         res = apr_pstrdup(atrans, *line);
 733         *line += strlen(*line);
 734         return res;
 735     }
 736
 737     res = apr_pstrndup(atrans, *line, pos - *line);
 738
 739     ++pos;
 740
 741     *line = pos;
 742
 743     return res;
 744 }
 745
 746 /* Get a word, (new) config-file style --- quoted strings and backslashes
 747  * all honored
 748  */
 749
 750 static char *substring_conf(apr_pool_t *p, const char *start, int len,
 751                             char quote)
 752 {
 753     char *result = apr_palloc(p, len + 2);
 754     char *resp = result;
 755     int i;
 756
 757     for (i = 0; i < len; ++i) {
 758         if (start[i] == '\\' && (start[i + 1] == '\\'
 759                                  || (quote && start[i + 1] == quote)))
 760             *resp++ = start[++i];
 761         else
 762             *resp++ = start[i];
 763     }
 764
 765     *resp++ = '\0';
 766 #if RESOLVE_ENV_PER_TOKEN
 767     return ap_resolve_env(p,result);
 768 #else
 769     return result;
 770 #endif
 771 }
 772
 773 AP_DECLARE(char *) ap_getword_conf_nc(apr_pool_t *p, char **line)
 774 {
 775     return ap_getword_conf(p, (const char **) line);
 776 }
 777
 778 AP_DECLARE(char *) ap_getword_conf(apr_pool_t *p, const char **line)
 779 {
 780     const char *str = *line, *strend;
 781     char *res;
 782     char quote;
 783
 784     while (*str && apr_isspace(*str))
 785         ++str;
 786
 787     if (!*str) {
 788         *line = str;
 789         return "";
 790     }
 791
 792     if ((quote = *str) == '"' || quote == '\'') {
 793         strend = str + 1;
 794         while (*strend && *strend != quote) {
 795             if (*strend == '\\' && strend[1] && strend[1] == quote)
 796                 strend += 2;
 797             else
 798                 ++strend;
 799         }
 800         res = substring_conf(p, str + 1, strend - str - 1, quote);
 801
 802         if (*strend == quote)
 803             ++strend;
 804     }
 805     else {
 806         strend = str;
 807         while (*strend && !apr_isspace(*strend))
 808             ++strend;
 809
 810         res = substring_conf(p, str, strend - str, 0);
 811     }
 812
 813     while (*strend && apr_isspace(*strend))
 814         ++strend;
 815     *line = strend;
 816     return res;
 817 }
 818
 819 /* Check a string for any ${ENV} environment variable
 820  * construct and replace each them by the value of
 821  * that environment variable, if it exists. If the
 822  * environment value does not exist, leave the ${ENV}
 823  * construct alone; it means something else.
 824  */
 825 AP_DECLARE(const char *) ap_resolve_env(apr_pool_t *p, const char * word)
 826 {
 827        char tmp[ MAX_STRING_LEN ];
 828        const char *s, *e;
 829        tmp[0] = '\0';
 830
 831        if (!(s=ap_strchr_c(word,'$')))
 832                return word;
 833
 834        do {
 835                /* XXX - relies on strncat() to add '\0'
 836                 */
 837                strncat(tmp,word,s - word);
 838                if ((s[1] == '{') && (e=ap_strchr_c(s,'}'))) {
 839                        const char *e2 = e;
 840                        word = e + 1;
 841                        e = getenv(s+2);
 842                        if (e) {
 843                            strcat(tmp,e);
 844                        } else {
 845                            strncat(tmp, s, e2-s);
 846                            strcat(tmp,"}");
 847                        }
 848                } else {
 849                        /* ignore invalid strings */
 850                        word = s+1;
 851                        strcat(tmp,"$");
 852                };
 853        } while ((s=ap_strchr_c(word,'$')));
 854        strcat(tmp,word);
 855
 856        return apr_pstrdup(p,tmp);
 857 }
 858 AP_DECLARE(int) ap_cfg_closefile(ap_configfile_t *cfp)
 859 {
 860 #ifdef DEBUG
 861     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
 862         "Done with config file %s", cfp->name);
 863 #endif
 864     return (cfp->close == NULL) ? 0 : cfp->close(cfp->param);
 865 }
 866
 867 static apr_status_t cfg_close(void *param)
 868 {
 869     apr_file_t *cfp = (apr_file_t *) param;
 870     return (apr_file_close(cfp));
 871 }
 872
 873 static int cfg_getch(void *param)
 874 {
 875     char ch;
 876     apr_file_t *cfp = (apr_file_t *) param;
 877     if (apr_file_getc(&ch, cfp) == APR_SUCCESS)
 878         return ch;
 879     return (int)EOF;
 880 }
 881
 882 static void *cfg_getstr(void *buf, size_t bufsiz, void *param)
 883 {
 884     apr_file_t *cfp = (apr_file_t *) param;
 885     apr_status_t rv;
 886     rv = apr_file_gets(buf, bufsiz, cfp);
 887     if (rv == APR_SUCCESS || (rv == APR_EOF && strcmp(buf, "")))
 888         return buf;
 889     return NULL;
 890 }
 891
 892 /* Open a ap_configfile_t as FILE, return open ap_configfile_t struct pointer */
 893 AP_DECLARE(apr_status_t) ap_pcfg_openfile(ap_configfile_t **ret_cfg,
 894                                           apr_pool_t *p, const char *name)
 895 {
 896     ap_configfile_t *new_cfg;
 897     apr_file_t *file = NULL;
 898     apr_finfo_t finfo;
 899     apr_status_t status;
 900 #ifdef DEBUG
 901     char buf[120];
 902 #endif
 903
 904     if (name == NULL) {
 905         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL,
 906                "Internal error: pcfg_openfile() called with NULL filename");
 907         return APR_EBADF;
 908     }
 909
 910     status = apr_file_open(&file, name, APR_READ | APR_BUFFERED,
 911                            APR_OS_DEFAULT, p);
 912 #ifdef DEBUG
 913     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
 914                 "Opening config file %s (%s)",
 915                 name, (status != APR_SUCCESS) ?
 916                 apr_strerror(status, buf, sizeof(buf)) : "successful");
 917 #endif
 918     if (status != APR_SUCCESS)
 919         return status;
 920
 921     status = apr_file_info_get(&finfo, APR_FINFO_TYPE, file);
 922     if (status != APR_SUCCESS)
 923         return status;
 924
 925     if (finfo.filetype != APR_REG &&
 926 #if defined(WIN32) || defined(OS2) || defined(NETWARE)
 927         strcasecmp(apr_filename_of_pathname(name), "nul") != 0) {
 928 #else
 929         strcmp(name, "/dev/null") != 0) {
 930 #endif /* WIN32 || OS2 */
 931         ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL,
 932                      "Access to file %s denied by server: not a regular file",
 933                      name);
 934         apr_file_close(file);
 935         return APR_EBADF;
 936     }
 937
 938 #ifdef WIN32
 939     /* Some twisted character [no pun intended] at MS decided that a
 940      * zero width joiner as the lead wide character would be ideal for
 941      * describing Unicode text files.  This was further convoluted to
 942      * another MSism that the same character mapped into utf-8, EF BB BF
 943      * would signify utf-8 text files.
 944      *
 945      * Since MS configuration files are all protecting utf-8 encoded
 946      * Unicode path, file and resource names, we already have the correct
 947      * WinNT encoding.  But at least eat the stupid three bytes up front.
 948      */
 949     {
 950         unsigned char buf[4];
 951         apr_size_t len = 3;
 952         status = apr_file_read(file, buf, &len);
 953         if ((status != APR_SUCCESS) || (len < 3)
 954               || memcmp(buf, "\xEF\xBB\xBF", 3) != 0) {
 955             apr_off_t zero = 0;
 956             apr_file_seek(file, APR_SET, &zero);
 957         }
 958     }
 959 #endif
 960
 961     new_cfg = apr_palloc(p, sizeof(*new_cfg));
 962     new_cfg->param = file;
 963     new_cfg->name = apr_pstrdup(p, name);
 964     new_cfg->getch = (int (*)(void *)) cfg_getch;
 965     new_cfg->getstr = (void *(*)(void *, size_t, void *)) cfg_getstr;
 966     new_cfg->close = (int (*)(void *)) cfg_close;
 967     new_cfg->line_number = 0;
 968     *ret_cfg = new_cfg;
 969     return APR_SUCCESS;
 970 }
 971
 972
 973 /* Allocate a ap_configfile_t handle with user defined functions and params */
 974 AP_DECLARE(ap_configfile_t *) ap_pcfg_open_custom(apr_pool_t *p,
 975                        const char *descr,
 976                        void *param,
 977                        int(*getch)(void *param),
 978                        void *(*getstr) (void *buf, size_t bufsiz, void *param),
 979                        int(*close_func)(void *param))
 980 {
 981     ap_configfile_t *new_cfg = apr_palloc(p, sizeof(*new_cfg));
 982 #ifdef DEBUG
 983     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
 984                  "Opening config handler %s", descr);
 985 #endif
 986     new_cfg->param = param;
 987     new_cfg->name = descr;
 988     new_cfg->getch = getch;
 989     new_cfg->getstr = getstr;
 990     new_cfg->close = close_func;
 991     new_cfg->line_number = 0;
 992     return new_cfg;
 993 }
 994
 995 /* Read one character from a configfile_t */
 996 AP_DECLARE(int) ap_cfg_getc(ap_configfile_t *cfp)
 997 {
 998     register int ch = cfp->getch(cfp->param);
 999     if (ch == LF)
1000         ++cfp->line_number;
1001     return ch;
1002 }
1003
1004 /* Read one line from open ap_configfile_t, strip LF, increase line number */
1005 /* If custom handler does not define a getstr() function, read char by char */
1006 AP_DECLARE(int) ap_cfg_getline(char *buf, size_t bufsize, ap_configfile_t *cfp)
1007 {
1008     /* If a "get string" function is defined, use it */
1009     if (cfp->getstr != NULL) {
1010         char *src, *dst;
1011         char *cp;
1012         char *cbuf = buf;
1013         size_t cbufsize = bufsize;
1014
1015         while (1) {
1016             ++cfp->line_number;
1017             if (cfp->getstr(cbuf, cbufsize, cfp->param) == NULL)
1018                 return 1;
1019
1020             /*
1021              *  check for line continuation,
1022              *  i.e. match [^\\]\\[\r]\n only
1023              */
1024             cp = cbuf;
1025             while (cp < cbuf+cbufsize && *cp != '\0')
1026                 cp++;
1027             if (cp > cbuf && cp[-1] == LF) {
1028                 cp--;
1029                 if (cp > cbuf && cp[-1] == CR)
1030                     cp--;
1031                 if (cp > cbuf && cp[-1] == '\\') {
1032                     cp--;
1033                     if (!(cp > cbuf && cp[-1] == '\\')) {
1034                         /*
1035                          * line continuation requested -
1036                          * then remove backslash and continue
1037                          */
1038                         cbufsize -= (cp-cbuf);
1039                         cbuf = cp;
1040                         continue;
1041                     }
1042                     else {
1043                         /*
1044                          * no real continuation because escaped -
1045                          * then just remove escape character
1046                          */
1047                         for ( ; cp < cbuf+cbufsize && *cp != '\0'; cp++)
1048                             cp[0] = cp[1];
1049                     }
1050                 }
1051             }
1052             break;
1053         }
1054
1055         /*
1056          * Leading and trailing white space is eliminated completely
1057          */
1058         src = buf;
1059         while (apr_isspace(*src))
1060             ++src;
1061         /* blast trailing whitespace */
1062         dst = &src[strlen(src)];
1063         while (--dst >= src && apr_isspace(*dst))
1064             *dst = '\0';
1065         /* Zap leading whitespace by shifting */
1066         if (src != buf)
1067             for (dst = buf; (*dst++ = *src++) != '\0'; )
1068                 ;
1069
1070 #ifdef DEBUG_CFG_LINES
1071         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, "Read config: %s", buf);
1072 #endif
1073         return 0;
1074     } else {
1075         /* No "get string" function defined; read character by character */
1076         register int c;
1077         register size_t i = 0;
1078
1079         buf[0] = '\0';
1080         /* skip leading whitespace */
1081         do {
1082             c = cfp->getch(cfp->param);
1083         } while (c == '\t' || c == ' ');
1084
1085         if (c == EOF)
1086             return 1;
1087
1088         if(bufsize < 2) {
1089             /* too small, assume caller is crazy */
1090             return 1;
1091         }
1092
1093         while (1) {
1094             if ((c == '\t') || (c == ' ')) {
1095                 buf[i++] = ' ';
1096                 while ((c == '\t') || (c == ' '))
1097                     c = cfp->getch(cfp->param);
1098             }
1099             if (c == CR) {
1100                 /* silently ignore CR (_assume_ that a LF follows) */
1101                 c = cfp->getch(cfp->param);
1102             }
1103             if (c == LF) {
1104                 /* increase line number and return on LF */
1105                 ++cfp->line_number;
1106             }
1107             if (c == EOF || c == 0x4 || c == LF || i >= (bufsize - 2)) {
1108                 /*
1109                  *  check for line continuation
1110                  */
1111                 if (i > 0 && buf[i-1] == '\\') {
1112                     i--;
1113                     if (!(i > 0 && buf[i-1] == '\\')) {
1114                         /* line is continued */
1115                         c = cfp->getch(cfp->param);
1116                         continue;
1117                     }
1118                     /* else nothing needs be done because
1119                      * then the backslash is escaped and
1120                      * we just strip to a single one
1121                      */
1122                 }
1123                 /* blast trailing whitespace */
1124                 while (i > 0 && apr_isspace(buf[i - 1]))
1125                     --i;
1126                 buf[i] = '\0';
1127 #ifdef DEBUG_CFG_LINES
1128                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
1129                              "Read config: %s", buf);
1130 #endif
1131                 return 0;
1132             }
1133             buf[i] = c;
1134             ++i;
1135             c = cfp->getch(cfp->param);
1136         }
1137     }
1138 }
1139
1140 /* Size an HTTP header field list item, as separated by a comma.
1141  * The return value is a pointer to the beginning of the non-empty list item
1142  * within the original string (or NULL if there is none) and the address
1143  * of field is shifted to the next non-comma, non-whitespace character.
1144  * len is the length of the item excluding any beginning whitespace.
1145  */
1146 AP_DECLARE(const char *) ap_size_list_item(const char **field, int *len)
1147 {
1148     const unsigned char *ptr = (const unsigned char *)*field;
1149     const unsigned char *token;
1150     int in_qpair, in_qstr, in_com;
1151
1152     /* Find first non-comma, non-whitespace byte */
1153
1154     while (*ptr == ',' || apr_isspace(*ptr))
1155         ++ptr;
1156
1157     token = ptr;
1158
1159     /* Find the end of this item, skipping over dead bits */
1160
1161     for (in_qpair = in_qstr = in_com = 0;
1162          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1163          ++ptr) {
1164
1165         if (in_qpair) {
1166             in_qpair = 0;
1167         }
1168         else {
1169             switch (*ptr) {
1170                 case '\\': in_qpair = 1;      /* quoted-pair         */
1171                            break;
1172                 case '"' : if (!in_com)       /* quoted string delim */
1173                                in_qstr = !in_qstr;
1174                            break;
1175                 case '(' : if (!in_qstr)      /* comment (may nest)  */
1176                                ++in_com;
1177                            break;
1178                 case ')' : if (in_com)        /* end comment         */
1179                                --in_com;
1180                            break;
1181                 default  : break;
1182             }
1183         }
1184     }
1185
1186     if ((*len = (ptr - token)) == 0) {
1187         *field = (const char *)ptr;
1188         return NULL;
1189     }
1190
1191     /* Advance field pointer to the next non-comma, non-white byte */
1192
1193     while (*ptr == ',' || apr_isspace(*ptr))
1194         ++ptr;
1195
1196     *field = (const char *)ptr;
1197     return (const char *)token;
1198 }
1199
1200 /* Retrieve an HTTP header field list item, as separated by a comma,
1201  * while stripping insignificant whitespace and lowercasing anything not in
1202  * a quoted string or comment.  The return value is a new string containing
1203  * the converted list item (or NULL if none) and the address pointed to by
1204  * field is shifted to the next non-comma, non-whitespace.
1205  */
1206 AP_DECLARE(char *) ap_get_list_item(apr_pool_t *p, const char **field)
1207 {
1208     const char *tok_start;
1209     const unsigned char *ptr;
1210     unsigned char *pos;
1211     char *token;
1212     int addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0, tok_len = 0;
1213
1214     /* Find the beginning and maximum length of the list item so that
1215      * we can allocate a buffer for the new string and reset the field.
1216      */
1217     if ((tok_start = ap_size_list_item(field, &tok_len)) == NULL) {
1218         return NULL;
1219     }
1220     token = apr_palloc(p, tok_len + 1);
1221
1222     /* Scan the token again, but this time copy only the good bytes.
1223      * We skip extra whitespace and any whitespace around a '=', '/',
1224      * or ';' and lowercase normal characters not within a comment,
1225      * quoted-string or quoted-pair.
1226      */
1227     for (ptr = (const unsigned char *)tok_start, pos = (unsigned char *)token;
1228          *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1229          ++ptr) {
1230
1231         if (in_qpair) {
1232             in_qpair = 0;
1233             *pos++ = *ptr;
1234         }
1235         else {
1236             switch (*ptr) {
1237                 case '\\': in_qpair = 1;
1238                            if (addspace == 1)
1239                                *pos++ = ' ';
1240                            *pos++ = *ptr;
1241                            addspace = 0;
1242                            break;
1243                 case '"' : if (!in_com)
1244                                in_qstr = !in_qstr;
1245                            if (addspace == 1)
1246                                *pos++ = ' ';
1247                            *pos++ = *ptr;
1248                            addspace = 0;
1249                            break;
1250                 case '(' : if (!in_qstr)
1251                                ++in_com;
1252                            if (addspace == 1)
1253                                *pos++ = ' ';
1254                            *pos++ = *ptr;
1255                            addspace = 0;
1256                            break;
1257                 case ')' : if (in_com)
1258                                --in_com;
1259                            *pos++ = *ptr;
1260                            addspace = 0;
1261                            break;
1262                 case ' ' :
1263                 case '\t': if (addspace)
1264                                break;
1265                            if (in_com || in_qstr)
1266                                *pos++ = *ptr;
1267                            else
1268                                addspace = 1;
1269                            break;
1270                 case '=' :
1271                 case '/' :
1272                 case ';' : if (!(in_com || in_qstr))
1273                                addspace = -1;
1274                            *pos++ = *ptr;
1275                            break;
1276                 default  : if (addspace == 1)
1277                                *pos++ = ' ';
1278                            *pos++ = (in_com || in_qstr) ? *ptr
1279                                                         : apr_tolower(*ptr);
1280                            addspace = 0;
1281                            break;
1282             }
1283         }
1284     }
1285     *pos = '\0';
1286
1287     return token;
1288 }
1289
1290 /* Find an item in canonical form (lowercase, no extra spaces) within
1291  * an HTTP field value list.  Returns 1 if found, 0 if not found.
1292  * This would be much more efficient if we stored header fields as
1293  * an array of list items as they are received instead of a plain string.
1294  */
1295 AP_DECLARE(int) ap_find_list_item(apr_pool_t *p, const char *line,
1296                                   const char *tok)
1297 {
1298     const unsigned char *pos;
1299     const unsigned char *ptr = (const unsigned char *)line;
1300     int good = 0, addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0;
1301
1302     if (!line || !tok)
1303         return 0;
1304
1305     do {  /* loop for each item in line's list */
1306
1307         /* Find first non-comma, non-whitespace byte */
1308
1309         while (*ptr == ',' || apr_isspace(*ptr))
1310             ++ptr;
1311
1312         if (*ptr)
1313             good = 1;  /* until proven otherwise for this item */
1314         else
1315             break;     /* no items left and nothing good found */
1316
1317         /* We skip extra whitespace and any whitespace around a '=', '/',
1318          * or ';' and lowercase normal characters not within a comment,
1319          * quoted-string or quoted-pair.
1320          */
1321         for (pos = (const unsigned char *)tok;
1322              *ptr && (in_qpair || in_qstr || in_com || *ptr != ',');
1323              ++ptr) {
1324
1325             if (in_qpair) {
1326                 in_qpair = 0;
1327                 if (good)
1328                     good = (*pos++ == *ptr);
1329             }
1330             else {
1331                 switch (*ptr) {
1332                     case '\\': in_qpair = 1;
1333                                if (addspace == 1)
1334                                    good = good && (*pos++ == ' ');
1335                                good = good && (*pos++ == *ptr);
1336                                addspace = 0;
1337                                break;
1338                     case '"' : if (!in_com)
1339                                    in_qstr = !in_qstr;
1340                                if (addspace == 1)
1341                                    good = good && (*pos++ == ' ');
1342                                good = good && (*pos++ == *ptr);
1343                                addspace = 0;
1344                                break;
1345                     case '(' : if (!in_qstr)
1346                                    ++in_com;
1347                                if (addspace == 1)
1348                                    good = good && (*pos++ == ' ');
1349                                good = good && (*pos++ == *ptr);
1350                                addspace = 0;
1351                                break;
1352                     case ')' : if (in_com)
1353                                    --in_com;
1354                                good = good && (*pos++ == *ptr);
1355                                addspace = 0;
1356                                break;
1357                     case ' ' :
1358                     case '\t': if (addspace || !good)
1359                                    break;
1360                                if (in_com || in_qstr)
1361                                    good = (*pos++ == *ptr);
1362                                else
1363                                    addspace = 1;
1364                                break;
1365                     case '=' :
1366                     case '/' :
1367                     case ';' : if (!(in_com || in_qstr))
1368                                    addspace = -1;
1369                                good = good && (*pos++ == *ptr);
1370                                break;
1371                     default  : if (!good)
1372                                    break;
1373                                if (addspace == 1)
1374                                    good = (*pos++ == ' ');
1375                                if (in_com || in_qstr)
1376                                    good = good && (*pos++ == *ptr);
1377                                else
1378                                    good = good && (*pos++ == apr_tolower(*ptr));
1379                                addspace = 0;
1380                                break;
1381                 }
1382             }
1383         }
1384         if (good && *pos)
1385             good = 0;          /* not good if only a prefix was matched */
1386
1387     } while (*ptr && !good);
1388
1389     return good;
1390 }
1391
1392
1393 /* Retrieve a token, spacing over it and returning a pointer to
1394  * the first non-white byte afterwards.  Note that these tokens
1395  * are delimited by semis and commas; and can also be delimited
1396  * by whitespace at the caller's option.
1397  */
1398
1399 AP_DECLARE(char *) ap_get_token(apr_pool_t *p, const char **accept_line,
1400                                 int accept_white)
1401 {
1402     const char *ptr = *accept_line;
1403     const char *tok_start;
1404     char *token;
1405     int tok_len;
1406
1407     /* Find first non-white byte */
1408
1409     while (*ptr && apr_isspace(*ptr))
1410         ++ptr;
1411
1412     tok_start = ptr;
1413
1414     /* find token end, skipping over quoted strings.
1415      * (comments are already gone).
1416      */
1417
1418     while (*ptr && (accept_white || !apr_isspace(*ptr))
1419            && *ptr != ';' && *ptr != ',') {
1420         if (*ptr++ == '"')
1421             while (*ptr)
1422                 if (*ptr++ == '"')
1423                     break;
1424     }
1425
1426     tok_len = ptr - tok_start;
1427     token = apr_pstrndup(p, tok_start, tok_len);
1428
1429     /* Advance accept_line pointer to the next non-white byte */
1430
1431     while (*ptr && apr_isspace(*ptr))
1432         ++ptr;
1433
1434     *accept_line = ptr;
1435     return token;
1436 }
1437
1438
1439 /* find http tokens, see the definition of token from RFC2068 */
1440 AP_DECLARE(int) ap_find_token(apr_pool_t *p, const char *line, const char *tok)
1441 {
1442     const unsigned char *start_token;
1443     const unsigned char *s;
1444
1445     if (!line)
1446         return 0;
1447
1448     s = (const unsigned char *)line;
1449     for (;;) {
1450         /* find start of token, skip all stop characters, note NUL
1451          * isn't a token stop, so we don't need to test for it
1452          */
1453         while (TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1454             ++s;
1455         }
1456         if (!*s) {
1457             return 0;
1458         }
1459         start_token = s;
1460         /* find end of the token */
1461         while (*s && !TEST_CHAR(*s, T_HTTP_TOKEN_STOP)) {
1462             ++s;
1463         }
1464         if (!strncasecmp((const char *)start_token, (const char *)tok,
1465                          s - start_token)) {
1466             return 1;
1467         }
1468         if (!*s) {
1469             return 0;
1470         }
1471     }
1472 }
1473
1474
1475 AP_DECLARE(int) ap_find_last_token(apr_pool_t *p, const char *line,
1476                                    const char *tok)
1477 {
1478     int llen, tlen, lidx;
1479
1480     if (!line)
1481         return 0;
1482
1483     llen = strlen(line);
1484     tlen = strlen(tok);
1485     lidx = llen - tlen;
1486
1487     if (lidx < 0 ||
1488         (lidx > 0 && !(apr_isspace(line[lidx - 1]) || line[lidx - 1] == ',')))
1489         return 0;
1490
1491     return (strncasecmp(&line[lidx], tok, tlen) == 0);
1492 }
1493
1494 AP_DECLARE(char *) ap_escape_shell_cmd(apr_pool_t *p, const char *str)
1495 {
1496     char *cmd;
1497     unsigned char *d;
1498     const unsigned char *s;
1499
1500     cmd = apr_palloc(p, 2 * strlen(str) + 1);        /* Be safe */
1501     d = (unsigned char *)cmd;
1502     s = (const unsigned char *)str;
1503     for (; *s; ++s) {
1504
1505 #if defined(OS2) || defined(WIN32)
1506         /*
1507          * Newlines to Win32/OS2 CreateProcess() are ill advised.
1508          * Convert them to spaces since they are effectively white
1509          * space to most applications
1510          */
1511         if (*s == '\r' || *s == '\n') {
1512              *d++ = ' ';
1513              continue;
1514          }
1515 #endif
1516
1517         if (TEST_CHAR(*s, T_ESCAPE_SHELL_CMD)) {
1518             *d++ = '\\';
1519         }
1520         *d++ = *s;
1521     }
1522     *d = '\0';
1523
1524     return cmd;
1525 }
1526
1527 static char x2c(const char *what)
1528 {
1529     register char digit;
1530
1531 #if !APR_CHARSET_EBCDIC
1532     digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10
1533              : (what[0] - '0'));
1534     digit *= 16;
1535     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10
1536               : (what[1] - '0'));
1537 #else /*APR_CHARSET_EBCDIC*/
1538     char xstr[5];
1539     xstr[0]='0';
1540     xstr[1]='x';
1541     xstr[2]=what[0];
1542     xstr[3]=what[1];
1543     xstr[4]='\0';
1544     digit = apr_xlate_conv_byte(ap_hdrs_from_ascii,
1545                                 0xFF & strtol(xstr, NULL, 16));
1546 #endif /*APR_CHARSET_EBCDIC*/
1547     return (digit);
1548 }
1549
1550 /*
1551  * Unescapes a URL.
1552  * Returns 0 on success, non-zero on error
1553  * Failure is due to
1554  *   bad % escape       returns HTTP_BAD_REQUEST
1555  *
1556  *   decoding %00 -> \0  (the null character)
1557  *   decoding %2f -> /   (a special character)
1558  *                      returns HTTP_NOT_FOUND
1559  */
1560 AP_DECLARE(int) ap_unescape_url(char *url)
1561 {
1562     register int badesc, badpath;
1563     char *x, *y;
1564
1565     badesc = 0;
1566     badpath = 0;
1567     /* Initial scan for first '%'. Don't bother writing values before
1568      * seeing a '%' */
1569     y = strchr(url, '%');
1570     if (y == NULL) {
1571         return OK;
1572     }
1573     for (x = y; *y; ++x, ++y) {
1574         if (*y != '%')
1575             *x = *y;
1576         else {
1577             if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
1578                 badesc = 1;
1579                 *x = '%';
1580             }
1581             else {
1582                 *x = x2c(y + 1);
1583                 y += 2;
1584                 if (IS_SLASH(*x) || *x == '\0')
1585                     badpath = 1;
1586             }
1587         }
1588     }
1589     *x = '\0';
1590     if (badesc)
1591         return HTTP_BAD_REQUEST;
1592     else if (badpath)
1593         return HTTP_NOT_FOUND;
1594     else
1595         return OK;
1596 }
1597
1598 AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname,
1599                                        apr_port_t port, const request_rec *r)
1600 {
1601     if (ap_is_default_port(port, r)) {
1602         return apr_pstrdup(p, hostname);
1603     }
1604     else {
1605         return apr_psprintf(p, "%s:%u", hostname, port);
1606     }
1607 }
1608
1609 /* c2x takes an unsigned, and expects the caller has guaranteed that
1610  * 0 <= what < 256... which usually means that you have to cast to
1611  * unsigned char first, because (unsigned)(char)(x) first goes through
1612  * signed extension to an int before the unsigned cast.
1613  *
1614  * The reason for this assumption is to assist gcc code generation --
1615  * the unsigned char -> unsigned extension is already done earlier in
1616  * both uses of this code, so there's no need to waste time doing it
1617  * again.
1618  */
1619 static const char c2x_table[] = "0123456789abcdef";
1620
1621 static APR_INLINE unsigned char *c2x(unsigned what, unsigned char *where)
1622 {
1623 #if APR_CHARSET_EBCDIC
1624     what = apr_xlate_conv_byte(ap_hdrs_to_ascii, (unsigned char)what);
1625 #endif /*APR_CHARSET_EBCDIC*/
1626     *where++ = '%';
1627     *where++ = c2x_table[what >> 4];
1628     *where++ = c2x_table[what & 0xf];
1629     return where;
1630 }
1631
1632 /*
1633  * escape_path_segment() escapes a path segment, as defined in RFC 1808. This
1634  * routine is (should be) OS independent.
1635  *
1636  * os_escape_path() converts an OS path to a URL, in an OS dependent way. In all
1637  * cases if a ':' occurs before the first '/' in the URL, the URL should be
1638  * prefixed with "./" (or the ':' escaped). In the case of Unix, this means
1639  * leaving '/' alone, but otherwise doing what escape_path_segment() does. For
1640  * efficiency reasons, we don't use escape_path_segment(), which is provided for
1641  * reference. Again, RFC 1808 is where this stuff is defined.
1642  *
1643  * If partial is set, os_escape_path() assumes that the path will be appended to
1644  * something with a '/' in it (and thus does not prefix "./").
1645  */
1646
1647 AP_DECLARE(char *) ap_escape_path_segment(apr_pool_t *p, const char *segment)
1648 {
1649     char *copy = apr_palloc(p, 3 * strlen(segment) + 1);
1650     const unsigned char *s = (const unsigned char *)segment;
1651     unsigned char *d = (unsigned char *)copy;
1652     unsigned c;
1653
1654     while ((c = *s)) {
1655         if (TEST_CHAR(c, T_ESCAPE_PATH_SEGMENT)) {
1656             d = c2x(c, d);
1657         }
1658         else {
1659             *d++ = c;
1660         }
1661         ++s;
1662     }
1663     *d = '\0';
1664     return copy;
1665 }
1666
1667 AP_DECLARE(char *) ap_os_escape_path(apr_pool_t *p, const char *path, int partial)
1668 {
1669     char *copy = apr_palloc(p, 3 * strlen(path) + 3);
1670     const unsigned char *s = (const unsigned char *)path;
1671     unsigned char *d = (unsigned char *)copy;
1672     unsigned c;
1673
1674     if (!partial) {
1675         const char *colon = ap_strchr_c(path, ':');
1676         const char *slash = ap_strchr_c(path, '/');
1677
1678         if (colon && (!slash || colon < slash)) {
1679             *d++ = '.';
1680             *d++ = '/';
1681         }
1682     }
1683     while ((c = *s)) {
1684         if (TEST_CHAR(c, T_OS_ESCAPE_PATH)) {
1685             d = c2x(c, d);
1686         }
1687         else {
1688             *d++ = c;
1689         }
1690         ++s;
1691     }
1692     *d = '\0';
1693     return copy;
1694 }
1695
1696 /* ap_escape_uri is now a macro for os_escape_path */
1697
1698 AP_DECLARE(char *) ap_escape_html(apr_pool_t *p, const char *s)
1699 {
1700     int i, j;
1701     char *x;
1702
1703     /* first, count the number of extra characters */
1704     for (i = 0, j = 0; s[i] != '\0'; i++)
1705         if (s[i] == '<' || s[i] == '>')
1706             j += 3;
1707         else if (s[i] == '&')
1708             j += 4;
1709
1710     if (j == 0)
1711         return apr_pstrmemdup(p, s, i);
1712
1713     x = apr_palloc(p, i + j + 1);
1714     for (i = 0, j = 0; s[i] != '\0'; i++, j++)
1715         if (s[i] == '<') {
1716             memcpy(&x[j], "&lt;", 4);
1717             j += 3;
1718         }
1719         else if (s[i] == '>') {
1720             memcpy(&x[j], "&gt;", 4);
1721             j += 3;
1722         }
1723         else if (s[i] == '&') {
1724             memcpy(&x[j], "&amp;", 5);
1725             j += 4;
1726         }
1727         else
1728             x[j] = s[i];
1729
1730     x[j] = '\0';
1731     return x;
1732 }
1733
1734 AP_DECLARE(int) ap_is_directory(apr_pool_t *p, const char *path)
1735 {
1736     apr_finfo_t finfo;
1737
1738     if (apr_stat(&finfo, path, APR_FINFO_TYPE, p) != APR_SUCCESS)
1739         return 0;                /* in error condition, just return no */
1740
1741     return (finfo.filetype == APR_DIR);
1742 }
1743
1744 AP_DECLARE(int) ap_is_rdirectory(apr_pool_t *p, const char *path)
1745 {
1746     apr_finfo_t finfo;
1747
1748     if (apr_lstat(&finfo, path, APR_FINFO_TYPE, p) != APR_SUCCESS)
1749         return 0;                /* in error condition, just return no */
1750
1751     return (finfo.filetype == APR_DIR);
1752 }
1753
1754 AP_DECLARE(char *) ap_make_full_path(apr_pool_t *a, const char *src1,
1755                                   const char *src2)
1756 {
1757     apr_size_t len1, len2;
1758     char *path;
1759
1760     len1 = strlen(src1);
1761     len2 = strlen(src2);
1762      /* allocate +3 for '/' delimiter, trailing NULL and overallocate
1763       * one extra byte to allow the caller to add a trailing '/'
1764       */
1765     path = (char *)apr_palloc(a, len1 + len2 + 3);
1766     if (len1 == 0) {
1767         *path = '/';
1768         memcpy(path + 1, src2, len2 + 1);
1769     }
1770     else {
1771         char *next;
1772         memcpy(path, src1, len1);
1773         next = path + len1;
1774         if (next[-1] != '/') {
1775             *next++ = '/';
1776         }
1777         memcpy(next, src2, len2 + 1);
1778     }
1779     return path;
1780 }
1781
1782 /*
1783  * Check for an absoluteURI syntax (see section 3.2 in RFC2068).
1784  */
1785 AP_DECLARE(int) ap_is_url(const char *u)
1786 {
1787     register int x;
1788
1789     for (x = 0; u[x] != ':'; x++) {
1790         if ((!u[x]) ||
1791             ((!apr_isalpha(u[x])) && (!apr_isdigit(u[x])) &&
1792              (u[x] != '+') && (u[x] != '-') && (u[x] != '.'))) {
1793             return 0;
1794         }
1795     }
1796
1797     return (x ? 1 : 0);                /* If the first character is ':', it's broken, too */
1798 }
1799
1800 AP_DECLARE(int) ap_ind(const char *s, char c)
1801 {
1802     const char *p = ap_strchr_c(s, c);
1803
1804     if (p == NULL)
1805         return -1;
1806     return p - s;
1807 }
1808
1809 AP_DECLARE(int) ap_rind(const char *s, char c)
1810 {
1811     const char *p = ap_strrchr_c(s, c);
1812
1813     if (p == NULL)
1814         return -1;
1815     return p - s;
1816 }
1817
1818 AP_DECLARE(void) ap_str_tolower(char *str)
1819 {
1820     while (*str) {
1821         *str = apr_tolower(*str);
1822         ++str;
1823     }
1824 }
1825
1826 static char *find_fqdn(apr_pool_t *a, struct hostent *p)
1827 {
1828     int x;
1829
1830     if (!strchr(p->h_name, '.')) {
1831         if (p->h_aliases) {
1832             for (x = 0; p->h_aliases[x]; ++x) {
1833                 if (strchr(p->h_aliases[x], '.') &&
1834                     (!strncasecmp(p->h_aliases[x], p->h_name,
1835                                   strlen(p->h_name))))
1836                     return apr_pstrdup(a, p->h_aliases[x]);
1837             }
1838         }
1839         return NULL;
1840     }
1841     return apr_pstrdup(a, (void *) p->h_name);
1842 }
1843
1844 char *ap_get_local_host(apr_pool_t *a)
1845 {
1846 #ifndef MAXHOSTNAMELEN
1847 #define MAXHOSTNAMELEN 256
1848 #endif
1849     char str[MAXHOSTNAMELEN + 1];
1850     char *server_hostname = NULL;
1851     struct hostent *p;
1852
1853 #ifdef BEOS_R5
1854     if (gethostname(str, sizeof(str) - 1) == 0)
1855 #else
1856     if (gethostname(str, sizeof(str) - 1) != 0)
1857 #endif
1858     {
1859         ap_log_perror(APLOG_MARK, APLOG_STARTUP | APLOG_WARNING, 0, a,
1860                      "%s: gethostname() failed to determine ServerName",
1861                      ap_server_argv0);
1862     }
1863     else
1864     {
1865         str[sizeof(str) - 1] = '\0';
1866         /* TODO: Screaming for APR-ization */
1867         if ((!(p = gethostbyname(str)))
1868             || (!(server_hostname = find_fqdn(a, p)))) {
1869             /* Recovery - return the default servername by IP: */
1870             if (p && p->h_addr_list[0]) {
1871                 apr_snprintf(str, sizeof(str), "%pA", p->h_addr_list[0]);
1872                 server_hostname = apr_pstrdup(a, str);
1873                 /* We will drop through to report the IP-named server */
1874             }
1875         }
1876         else {
1877             /* Since we found a fdqn, return it with no logged message. */
1878             return server_hostname;
1879         }
1880     }
1881
1882     if (!server_hostname)
1883         server_hostname = apr_pstrdup(a, "127.0.0.1");
1884
1885     ap_log_perror(APLOG_MARK, APLOG_ALERT|APLOG_STARTUP, 0, a,
1886                  "%s: Could not determine the server's fully qualified "
1887                  "domain name, using %s for ServerName",
1888                  ap_server_argv0, server_hostname);
1889
1890     return server_hostname;
1891 }
1892
1893 /* simple 'pool' alloc()ing glue to apr_base64.c
1894  */
1895 AP_DECLARE(char *) ap_pbase64decode(apr_pool_t *p, const char *bufcoded)
1896 {
1897     char *decoded;
1898     int l;
1899
1900     decoded = (char *) apr_palloc(p, 1 + apr_base64_decode_len(bufcoded));
1901     l = apr_base64_decode(decoded, bufcoded);
1902     decoded[l] = '\0'; /* make binary sequence into string */
1903
1904     return decoded;
1905 }
1906
1907 AP_DECLARE(char *) ap_pbase64encode(apr_pool_t *p, char *string)
1908 {
1909     char *encoded;
1910     int l = strlen(string);
1911
1912     encoded = (char *) apr_palloc(p, 1 + apr_base64_encode_len(l));
1913     l = apr_base64_encode(encoded, string, l);
1914     encoded[l] = '\0'; /* make binary sequence into string */
1915
1916     return encoded;
1917 }
1918
1919 /* we want to downcase the type/subtype for comparison purposes
1920  * but nothing else because ;parameter=foo values are case sensitive.
1921  * XXX: in truth we want to downcase parameter names... but really,
1922  * apache has never handled parameters and such correctly.  You
1923  * also need to compress spaces and such to be able to compare
1924  * properly. -djg
1925  */
1926 AP_DECLARE(void) ap_content_type_tolower(char *str)
1927 {
1928     char *semi;
1929
1930     semi = strchr(str, ';');
1931     if (semi) {
1932         *semi = '\0';
1933     }
1934     while (*str) {
1935         *str = apr_tolower(*str);
1936         ++str;
1937     }
1938     if (semi) {
1939         *semi = ';';
1940     }
1941 }
1942
1943 /*
1944  * Given a string, replace any bare " with \" .
1945  */
1946 AP_DECLARE(char *) ap_escape_quotes(apr_pool_t *p, const char *instring)
1947 {
1948     int newlen = 0;
1949     const char *inchr = instring;
1950     char *outchr, *outstring;
1951
1952     /*
1953      * Look through the input string, jogging the length of the output
1954      * string up by an extra byte each time we find an unescaped ".
1955      */
1956     while (*inchr != '\0') {
1957         newlen++;
1958         if (*inchr == '"') {
1959             newlen++;
1960         }
1961         /*
1962          * If we find a slosh, and it's not the last byte in the string,
1963          * it's escaping something - advance past both bytes.
1964          */
1965         if ((*inchr == '\\') && (inchr[1] != '\0')) {
1966             inchr++;
1967             newlen++;
1968         }
1969         inchr++;
1970     }
1971     outstring = apr_palloc(p, newlen + 1);
1972     inchr = instring;
1973     outchr = outstring;
1974     /*
1975      * Now copy the input string to the output string, inserting a slosh
1976      * in front of every " that doesn't already have one.
1977      */
1978     while (*inchr != '\0') {
1979         if ((*inchr == '\\') && (inchr[1] != '\0')) {
1980             *outchr++ = *inchr++;
1981             *outchr++ = *inchr++;
1982         }
1983         if (*inchr == '"') {
1984             *outchr++ = '\\';
1985         }
1986         if (*inchr != '\0') {
1987             *outchr++ = *inchr++;
1988         }
1989     }
1990     *outchr = '\0';
1991     return outstring;
1992 }