granicus.if.org Git - apache/blob - server/util_uri.c

   1 /* ====================================================================
   2  * The Apache Software License, Version 1.1
   3  *
   4  * Copyright (c) 2000 The Apache Software Foundation.  All rights
   5  * reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  *
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * 3. The end-user documentation included with the redistribution,
  20  *    if any, must include the following acknowledgment:
  21  *       "This product includes software developed by the
  22  *        Apache Software Foundation (http://www.apache.org/)."
  23  *    Alternately, this acknowledgment may appear in the software itself,
  24  *    if and wherever such third-party acknowledgments normally appear.
  25  *
  26  * 4. The names "Apache" and "Apache Software Foundation" must
  27  *    not be used to endorse or promote products derived from this
  28  *    software without prior written permission. For written
  29  *    permission, please contact apache@apache.org.
  30  *
  31  * 5. Products derived from this software may not be called "Apache",
  32  *    nor may "Apache" appear in their name, without prior written
  33  *    permission of the Apache Software Foundation.
  34  *
  35  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46  * SUCH DAMAGE.
  47  * ====================================================================
  48  *
  49  * This software consists of voluntary contributions made by many
  50  * individuals on behalf of the Apache Software Foundation.  For more
  51  * information on the Apache Software Foundation, please see
  52  * <http://www.apache.org/>.
  53  *
  54  * Portions of this software are based upon public domain software
  55  * originally written at the National Center for Supercomputing Applications,
  56  * University of Illinois, Urbana-Champaign.
  57  */
  58
  59 /*
  60  * util_uri.c: URI related utility things
  61  *
  62  */
  63
  64 #include "ap_config.h"
  65 #include "apr_strings.h"
  66 #include "httpd.h"
  67 #include "http_log.h"
  68 #include "util_uri.h"
  69 #ifdef HAVE_STRING_H
  70 #include <string.h>
  71 #endif
  72 #ifdef HAVE_STRINGS_H
  73 #include <strings.h>
  74 #endif
  75 #ifdef HAVE_NETDB_H
  76 #include <netdb.h>
  77 #endif
  78
  79 /* Some WWW schemes and their default ports; this is basically /etc/services */
  80 /* This will become global when the protocol abstraction comes */
  81 /* As the schemes are searched by a linear search, */
  82 /* they are sorted by their expected frequency */
  83 static schemes_t schemes[] =
  84 {
  85     {"http",   DEFAULT_HTTP_PORT},
  86     {"ftp",    DEFAULT_FTP_PORT},
  87     {"https",  DEFAULT_HTTPS_PORT},
  88     {"gopher", DEFAULT_GOPHER_PORT},
  89     {"wais",   DEFAULT_WAIS_PORT},
  90     {"nntp",   DEFAULT_NNTP_PORT},
  91     {"snews",  DEFAULT_SNEWS_PORT},
  92     {"prospero", DEFAULT_PROSPERO_PORT},
  93     { NULL, 0xFFFF }                    /* unknown port */
  94 };
  95
  96
  97 API_EXPORT(unsigned short) ap_default_port_for_scheme(const char *scheme_str)
  98 {
  99     schemes_t *scheme;
 100
 101     for (scheme = schemes; scheme->name != NULL; ++scheme)
 102         if (strcasecmp(scheme_str, scheme->name) == 0)
 103             return scheme->default_port;
 104
 105     return 0;
 106 }
 107
 108 API_EXPORT(unsigned short) ap_default_port_for_request(const request_rec *r)
 109 {
 110     return (r->parsed_uri.scheme)
 111         ? ap_default_port_for_scheme(r->parsed_uri.scheme)
 112         : 0;
 113 }
 114
 115 /* Create a copy of a "struct hostent" record; it was presumably returned
 116  * from a call to gethostbyname() and lives in static storage.
 117  * By creating a copy we can tuck it away for later use.
 118  */
 119 API_EXPORT(struct hostent *) ap_pduphostent(apr_pool_t *p, const struct hostent *hp)
 120 {
 121     struct hostent *newent;
 122     char          **ptrs;
 123     char          **aliases;
 124     struct in_addr *addrs;
 125     int            i = 0, j = 0;
 126
 127     if (hp == NULL)
 128         return NULL;
 129
 130     /* Count number of alias entries */
 131     if (hp->h_aliases != NULL)
 132         for (; hp->h_aliases[j] != NULL; ++j)
 133             continue;
 134
 135     /* Count number of in_addr entries */
 136     if (hp->h_addr_list != NULL)
 137         for (; hp->h_addr_list[i] != NULL; ++i)
 138             continue;
 139
 140     /* Allocate hostent structure, alias ptrs, addr ptrs, addrs */
 141     newent = (struct hostent *) apr_palloc(p, sizeof(*hp));
 142     aliases = (char **) apr_palloc(p, (j+1) * sizeof(char*));
 143     ptrs = (char **) apr_palloc(p, (i+1) * sizeof(char*));
 144     addrs  = (struct in_addr *) apr_palloc(p, (i+1) * sizeof(struct in_addr));
 145
 146     *newent = *hp;
 147     newent->h_name = apr_pstrdup(p, hp->h_name);
 148     newent->h_aliases = aliases;
 149     newent->h_addr_list = (char**) ptrs;
 150
 151     /* Copy Alias Names: */
 152     for (j = 0; hp->h_aliases[j] != NULL; ++j) {
 153        aliases[j] = apr_pstrdup(p, hp->h_aliases[j]);
 154     }
 155     aliases[j] = NULL;
 156
 157     /* Copy address entries */
 158     for (i = 0; hp->h_addr_list[i] != NULL; ++i) {
 159         ptrs[i] = (char*) &addrs[i];
 160         addrs[i] = *(struct in_addr *) hp->h_addr_list[i];
 161     }
 162     ptrs[i] = NULL;
 163
 164     return newent;
 165 }
 166
 167
 168 /* pgethostbyname(): resolve hostname, if successful return an ALLOCATED
 169  * COPY OF the hostent structure, intended to be stored and used later.
 170  * (gethostbyname() uses static storage that would be overwritten on each call)
 171  */
 172 API_EXPORT(struct hostent *) ap_pgethostbyname(apr_pool_t *p, const char *hostname)
 173 {
 174     struct hostent *hp = gethostbyname(hostname);
 175     return (hp == NULL) ? NULL : ap_pduphostent(p, hp);
 176 }
 177
 178
 179 /* Unparse a uri_components structure to an URI string.
 180  * Optionally suppress the password for security reasons.
 181  */
 182 API_EXPORT(char *) ap_unparse_uri_components(apr_pool_t *p, const uri_components *uptr, unsigned flags)
 183 {
 184     char *ret = "";
 185
 186     /* If suppressing the site part, omit both user name & scheme://hostname */
 187     if (!(flags & UNP_OMITSITEPART)) {
 188
 189         /* Construct a "user:password@" string, honoring the passed UNP_ flags: */
 190         if (uptr->user||uptr->password)
 191             ret = apr_pstrcat (p,
 192                         (uptr->user     && !(flags & UNP_OMITUSER)) ? uptr->user : "",
 193                         (uptr->password && !(flags & UNP_OMITPASSWORD)) ? ":" : "",
 194                         (uptr->password && !(flags & UNP_OMITPASSWORD))
 195                            ? ((flags & UNP_REVEALPASSWORD) ? uptr->password : "XXXXXXXX")
 196                            : "",
 197                         "@", NULL);
 198
 199         /* Construct scheme://site string */
 200         if (uptr->hostname) {
 201             int is_default_port;
 202
 203             is_default_port =
 204                 (uptr->port_str == NULL ||
 205                  uptr->port == 0 ||
 206                  uptr->port == ap_default_port_for_scheme(uptr->scheme));
 207
 208             ret = apr_pstrcat (p,
 209                         uptr->scheme, "://", ret,
 210                         uptr->hostname ? uptr->hostname : "",
 211                         is_default_port ? "" : ":",
 212                         is_default_port ? "" : uptr->port_str,
 213                         NULL);
 214         }
 215     }
 216
 217     /* Should we suppress all path info? */
 218     if (!(flags & UNP_OMITPATHINFO)) {
 219         /* Append path, query and fragment strings: */
 220         ret = apr_pstrcat (p,
 221                 ret,
 222                 uptr->path ? uptr->path : "",
 223                 (uptr->query    && !(flags & UNP_OMITQUERY)) ? "?" : "",
 224                 (uptr->query    && !(flags & UNP_OMITQUERY)) ? uptr->query : "",
 225                 (uptr->fragment && !(flags & UNP_OMITQUERY)) ? "#" : NULL,
 226                 (uptr->fragment && !(flags & UNP_OMITQUERY)) ? uptr->fragment : NULL,
 227                 NULL);
 228     }
 229     return ret;
 230 }
 231
 232 /* The regex version of parse_uri_components has the advantage that it is
 233  * relatively easy to understand and extend.  But it has the disadvantage
 234  * that the regexes are complex enough that regex libraries really
 235  * don't do a great job with them performancewise.
 236  *
 237  * The default is a hand coded scanner that is two orders of magnitude
 238  * faster.
 239  */
 240 #ifdef UTIL_URI_REGEX
 241
 242 static regex_t re_uri;
 243 static regex_t re_hostpart;
 244
 245 API_EXPORT(void) ap_util_uri_init(void)
 246 {
 247     int ret;
 248     const char *re_str;
 249
 250     /* This is a modified version of the regex that appeared in
 251      * draft-fielding-uri-syntax-01.  It doesnt allow the uri to contain a
 252      * scheme but no hostinfo or vice versa.
 253      *
 254      * draft-fielding-uri-syntax-01.txt, section 4.4 tells us:
 255      *
 256      *      Although the BNF defines what is allowed in each component, it is
 257      *      ambiguous in terms of differentiating between a site component and
 258      *      a path component that begins with two slash characters.
 259      *
 260      * RFC2068 disambiguates this for the Request-URI, which may only ever be
 261      * the "abs_path" portion of the URI.  So a request "GET //foo/bar
 262      * HTTP/1.1" is really referring to the path //foo/bar, not the host foo,
 263      * path /bar.  Nowhere in RFC2068 is it possible to have a scheme but no
 264      * hostinfo or a hostinfo but no scheme.  (Unless you're proxying a
 265      * protocol other than HTTP, but this parsing engine probably won't work
 266      * for other protocols.)
 267      *
 268      *         12            3          4       5   6        7 8 */
 269     re_str = "^(([^:/?#]+)://([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$";
 270     /*          ^scheme--^   ^site---^  ^path--^   ^query^    ^frag */
 271     if ((ret = regcomp(&re_uri, re_str, REG_EXTENDED)) != 0) {
 272         char line[1024];
 273
 274         /* Make a readable error message */
 275         ret = regerror(ret, &re_uri, line, sizeof line);
 276         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, NULL,
 277                 "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
 278                 "possibly due to broken regex lib! "
 279                 "Did you define WANTHSREGEX=yes?",
 280                 re_str, line);
 281
 282         exit(1);
 283     }
 284
 285     /* This is a sub-RE which will break down the hostinfo part,
 286      * i.e., user, password, hostname and port.
 287      * $          12      3 4        5       6 7    */
 288     re_str    = "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$";
 289     /*             ^^user^ :pw        ^host^ ^:[port]^ */
 290     if ((ret = regcomp(&re_hostpart, re_str, REG_EXTENDED)) != 0) {
 291         char line[1024];
 292
 293         /* Make a readable error message */
 294         ret = regerror(ret, &re_hostpart, line, sizeof line);
 295         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, NULL,
 296                 "Internal error: regcomp(\"%s\") returned non-zero (%s) - "
 297                 "possibly due to broken regex lib! "
 298                 "Did you define WANTHSREGEX=yes?",
 299                 re_str, line);
 300
 301         exit(1);
 302     }
 303 }
 304
 305
 306 /* parse_uri_components():
 307  * Parse a given URI, fill in all supplied fields of a uri_components
 308  * structure. This eliminates the necessity of extracting host, port,
 309  * path, query info repeatedly in the modules.
 310  * Side effects:
 311  *  - fills in fields of uri_components *uptr
 312  *  - none on any of the r->* fields
 313  */
 314 API_EXPORT(int) ap_parse_uri_components(apr_pool_t *p, const char *uri, uri_components *uptr)
 315 {
 316     int ret;
 317     regmatch_t match[10];       /* This must have at least as much elements
 318                                 * as there are braces in the re_strings */
 319
 320     ap_assert (uptr != NULL);
 321
 322     /* Initialize the structure. parse_uri() and parse_uri_components()
 323      * can be called more than once per request.
 324      */
 325     memset (uptr, '\0', sizeof(*uptr));
 326     uptr->is_initialized = 1;
 327
 328     ret = ap_regexec(&re_uri, uri, re_uri.re_nsub + 1, match, 0);
 329
 330     if (ret != 0) {
 331         ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, NULL,
 332                     "ap_regexec() could not parse uri (\"%s\")",
 333                     uri);
 334
 335         return HTTP_BAD_REQUEST;
 336     }
 337
 338     if (match[2].rm_so != match[2].rm_eo)
 339         uptr->scheme = apr_pstrndup (p, uri+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
 340
 341     /* empty hostinfo is valid, that's why we test $1 but use $3 */
 342     if (match[1].rm_so != match[1].rm_eo)
 343         uptr->hostinfo = apr_pstrndup (p, uri+match[3].rm_so, match[3].rm_eo - match[3].rm_so);
 344
 345     if (match[4].rm_so != match[4].rm_eo)
 346         uptr->path = apr_pstrndup (p, uri+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
 347
 348     /* empty query string is valid, that's why we test $5 but use $6 */
 349     if (match[5].rm_so != match[5].rm_eo)
 350         uptr->query = apr_pstrndup (p, uri+match[6].rm_so, match[6].rm_eo - match[6].rm_so);
 351
 352     /* empty fragment is valid, test $7 use $8 */
 353     if (match[7].rm_so != match[7].rm_eo)
 354         uptr->fragment = apr_pstrndup (p, uri+match[8].rm_so, match[8].rm_eo - match[8].rm_so);
 355
 356     if (uptr->hostinfo) {
 357         /* Parse the hostinfo part to extract user, password, host, and port */
 358         ret = ap_regexec(&re_hostpart, uptr->hostinfo, re_hostpart.re_nsub + 1, match, 0);
 359         if (ret != 0) {
 360             ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, NULL,
 361                     "ap_regexec() could not parse (\"%s\") as host part",
 362                     uptr->hostinfo);
 363
 364             return HTTP_BAD_REQUEST;
 365         }
 366
 367         /* $      12      3 4        5       6 7            */
 368         /*      "^(([^:]*)(:(.*))?@)?([^@:]*)(:([0-9]*))?$" */
 369         /*         ^^user^ :pw        ^host^ ^:[port]^      */
 370
 371         /* empty user is valid, that's why we test $1 but use $2 */
 372         if (match[1].rm_so != match[1].rm_eo)
 373             uptr->user = apr_pstrndup (p, uptr->hostinfo+match[2].rm_so, match[2].rm_eo - match[2].rm_so);
 374
 375         /* empty password is valid, test $3 but use $4 */
 376         if (match[3].rm_so != match[3].rm_eo)
 377             uptr->password = apr_pstrndup (p, uptr->hostinfo+match[4].rm_so, match[4].rm_eo - match[4].rm_so);
 378
 379         /* empty hostname is valid, and implied by the existence of hostinfo */
 380         uptr->hostname = apr_pstrndup (p, uptr->hostinfo+match[5].rm_so, match[5].rm_eo - match[5].rm_so);
 381
 382         if (match[6].rm_so != match[6].rm_eo) {
 383             /* Note that the port string can be empty.
 384              * If it is, we use the default port associated with the scheme
 385              */
 386             uptr->port_str = apr_pstrndup (p, uptr->hostinfo+match[7].rm_so, match[7].rm_eo - match[7].rm_so);
 387             if (uptr->port_str[0] != '\0') {
 388                 char *endstr;
 389                 int port;
 390
 391                 port = strtol(uptr->port_str, &endstr, 10);
 392                 uptr->port = port;
 393                 if (*endstr != '\0') {
 394                     /* Invalid characters after ':' found */
 395                     return HTTP_BAD_REQUEST;
 396                 }
 397             }
 398             else {
 399                 uptr->port = uptr->scheme ? ap_default_port_for_scheme(uptr->scheme) : DEFAULT_HTTP_PORT;
 400             }
 401         }
 402     }
 403
 404     if (ret == 0)
 405         ret = HTTP_OK;
 406     return ret;
 407 }
 408 #else
 409
 410 /* Here is the hand-optimized parse_uri_components().  There are some wild
 411  * tricks we could pull in assembly language that we don't pull here... like we
 412  * can do word-at-time scans for delimiter characters using the same technique
 413  * that fast memchr()s use.  But that would be way non-portable. -djg
 414  */
 415
 416 /* We have a apr_table_t that we can index by character and it tells us if the
 417  * character is one of the interesting delimiters.  Note that we even get
 418  * compares for NUL for free -- it's just another delimiter.
 419  */
 420
 421 #define T_COLON         0x01    /* ':' */
 422 #define T_SLASH         0x02    /* '/' */
 423 #define T_QUESTION      0x04    /* '?' */
 424 #define T_HASH          0x08    /* '#' */
 425 #define T_NUL           0x80    /* '\0' */
 426
 427 /* the uri_delims.h file is autogenerated by gen_uri_delims.c */
 428 #include "uri_delims.h"
 429
 430 /* it works like this:
 431     if (uri_delims[ch] & NOTEND_foobar) {
 432         then we're not at a delimiter for foobar
 433     }
 434 */
 435
 436 /* Note that we optimize the scheme scanning here, we cheat and let the
 437  * compiler know that it doesn't have to do the & masking.
 438  */
 439 #define NOTEND_SCHEME   (0xff)
 440 #define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
 441 #define NOTEND_PATH     (T_QUESTION | T_HASH | T_NUL)
 442
 443 API_EXPORT(void) ap_util_uri_init(void)
 444 {
 445     /* nothing to do */
 446 }
 447
 448 /* parse_uri_components():
 449  * Parse a given URI, fill in all supplied fields of a uri_components
 450  * structure. This eliminates the necessity of extracting host, port,
 451  * path, query info repeatedly in the modules.
 452  * Side effects:
 453  *  - fills in fields of uri_components *uptr
 454  *  - none on any of the r->* fields
 455  */
 456 API_EXPORT(int) ap_parse_uri_components(apr_pool_t *p, const char *uri, uri_components *uptr)
 457 {
 458     const char *s;
 459     const char *s1;
 460     const char *hostinfo;
 461     char *endstr;
 462     int port;
 463
 464     /* Initialize the structure. parse_uri() and parse_uri_components()
 465      * can be called more than once per request.
 466      */
 467     memset (uptr, '\0', sizeof(*uptr));
 468     uptr->is_initialized = 1;
 469
 470     /* We assume the processor has a branch predictor like most --
 471      * it assumes forward branches are untaken and backwards are taken.  That's
 472      * the reason for the gotos.  -djg
 473      */
 474     if (uri[0] == '/') {
 475 deal_with_path:
 476         /* we expect uri to point to first character of path ... remember
 477          * that the path could be empty -- http://foobar?query for example
 478          */
 479         s = uri;
 480         while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
 481             ++s;
 482         }
 483         if (s != uri) {
 484             uptr->path = apr_pstrndup(p, uri, s - uri);
 485         }
 486         if (*s == 0) {
 487             return HTTP_OK;
 488         }
 489         if (*s == '?') {
 490             ++s;
 491             s1 = ap_strchr_c(s, '#');
 492             if (s1) {
 493                 uptr->fragment = apr_pstrdup(p, s1 + 1);
 494                 uptr->query = apr_pstrndup(p, s, s1 - s);
 495             }
 496             else {
 497                 uptr->query = apr_pstrdup(p, s);
 498             }
 499             return HTTP_OK;
 500         }
 501         /* otherwise it's a fragment */
 502         uptr->fragment = apr_pstrdup(p, s + 1);
 503         return HTTP_OK;
 504     }
 505
 506     /* find the scheme: */
 507     s = uri;
 508     while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) {
 509         ++s;
 510     }
 511     /* scheme must be non-empty and followed by :// */
 512     if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') {
 513         goto deal_with_path;    /* backwards predicted taken! */
 514     }
 515
 516     uptr->scheme = apr_pstrndup(p, uri, s - uri);
 517     s += 3;
 518     hostinfo = s;
 519     while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
 520         ++s;
 521     }
 522     uri = s;    /* whatever follows hostinfo is start of uri */
 523     uptr->hostinfo = apr_pstrndup(p, hostinfo, uri - hostinfo);
 524
 525     /* If there's a username:password@host:port, the @ we want is the last @...
 526      * too bad there's no memrchr()... For the C purists, note that hostinfo
 527      * is definately not the first character of the original uri so therefore
 528      * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
 529      */
 530     do {
 531         --s;
 532     } while (s >= hostinfo && *s != '@');
 533     if (s < hostinfo) {
 534         /* again we want the common case to be fall through */
 535 deal_with_host:
 536         /* We expect hostinfo to point to the first character of
 537          * the hostname.  If there's a port it is the first colon.
 538          */
 539         s = memchr(hostinfo, ':', uri - hostinfo);
 540         if (s == NULL) {
 541             /* we expect the common case to have no port */
 542             uptr->hostname = apr_pstrndup(p, hostinfo, uri - hostinfo);
 543             goto deal_with_path;
 544         }
 545         uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo);
 546         ++s;
 547         uptr->port_str = apr_pstrndup(p, s, uri - s);
 548         if (uri != s) {
 549             port = strtol(uptr->port_str, &endstr, 10);
 550             uptr->port = port;
 551             if (*endstr == '\0') {
 552                 goto deal_with_path;
 553             }
 554             /* Invalid characters after ':' found */
 555             return HTTP_BAD_REQUEST;
 556         }
 557         uptr->port = ap_default_port_for_scheme(uptr->scheme);
 558         goto deal_with_path;
 559     }
 560
 561     /* first colon delimits username:password */
 562     s1 = memchr(hostinfo, ':', s - hostinfo);
 563     if (s1) {
 564         uptr->user = apr_pstrndup(p, hostinfo, s1 - hostinfo);
 565         ++s1;
 566         uptr->password = apr_pstrndup(p, s1, s - s1);
 567     }
 568     else {
 569         uptr->user = apr_pstrndup(p, hostinfo, s - hostinfo);
 570     }
 571     hostinfo = s + 1;
 572     goto deal_with_host;
 573 }
 574
 575 /* Special case for CONNECT parsing: it comes with the hostinfo part only */
 576 /* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
 577  * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
 578  * for the format of the "CONNECT host:port HTTP/1.0" request
 579  */
 580 API_EXPORT(int) ap_parse_hostinfo_components(apr_pool_t *p, const char *hostinfo, uri_components *uptr)
 581 {
 582     const char *s;
 583     char *endstr;
 584
 585     /* Initialize the structure. parse_uri() and parse_uri_components()
 586      * can be called more than once per request.
 587      */
 588     memset (uptr, '\0', sizeof(*uptr));
 589     uptr->is_initialized = 1;
 590     uptr->hostinfo = apr_pstrdup(p, hostinfo);
 591
 592     /* We expect hostinfo to point to the first character of
 593      * the hostname.  There must be a port, separated by a colon
 594      */
 595     s = ap_strchr_c(hostinfo, ':');
 596     if (s == NULL) {
 597         return HTTP_BAD_REQUEST;
 598     }
 599     uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo);
 600     ++s;
 601     uptr->port_str = apr_pstrdup(p, s);
 602     if (*s != '\0') {
 603         uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
 604         if (*endstr == '\0') {
 605             return HTTP_OK;
 606         }
 607         /* Invalid characters after ':' found */
 608     }
 609     return HTTP_BAD_REQUEST;
 610 }
 611 #endif