]> granicus.if.org Git - php/commitdiff
Added a much faster parse_url() implementation. This also fixes a number
authorIlia Alshanetsky <iliaa@php.net>
Sun, 6 Oct 2002 16:14:42 +0000 (16:14 +0000)
committerIlia Alshanetsky <iliaa@php.net>
Sun, 6 Oct 2002 16:14:42 +0000 (16:14 +0000)
of bugs in the old parse_url() implementation.

ext/standard/url.c

index 6312d338c65c330b1852beeb2e65babf271d5c88..6a7153ce3c487e1746dc3eb32c58d59a0595f62c 100644 (file)
@@ -85,107 +85,143 @@ PHPAPI char *php_replace_controlchars(char *str)
  */
 PHPAPI php_url *php_url_parse(char *str)
 {
-       regex_t re;
-       regmatch_t subs[11];
-       int err;
        int length = strlen(str);
-       char *result;
+       char port_buf[5];
        php_url *ret = ecalloc(1, sizeof(php_url));
+       char *s, *e, *p, *pp, *ue;
+               
+       s = str;
+       ue = s + length;
 
-       /* from Appendix B of draft-fielding-url-syntax-09,
-          http://www.ics.uci.edu/~fielding/url/url.txt */
-       err = regcomp(&re, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", REG_EXTENDED);
-       if (err) {
-               /*php_error(E_WARNING, "Unable to compile regex: %d\n", err);*/
-               efree(ret);
-               return NULL;
-       }
-       err = regexec(&re, str, 10, subs, 0);
-       if (err) {
-               /*php_error(E_WARNING, "Error with regex\n");*/
-               efree(ret);
-               regfree(&re);
-               return NULL;
-       }
-       /* no processing necessary on the scheme */
-       if (subs[2].rm_so != -1 && subs[2].rm_so <= length) {
-               ret->scheme = estrndup(str + subs[2].rm_so, subs[2].rm_eo - subs[2].rm_so);
+       /* parse scheme */
+       if ((e = strchr(s, ':')) && *(e+1) == '/' && *(e+2) == '/' && (e-s)) {
+               ret->scheme = estrndup(s, (e-s));
                php_replace_controlchars(ret->scheme);
-       }
-
-       /* the path to the resource */
-       if (subs[5].rm_so != -1 && subs[5].rm_so <= length) {
-               ret->path = estrndup(str + subs[5].rm_so, subs[5].rm_eo - subs[5].rm_so);
+               s = e + 3;
+       } else if (e) { /* no scheme, look for port */
+               p = e + 1;
+               pp = p;
+               
+               while (pp-p < 6 && isdigit(*pp)) {
+                       pp++;
+               }
+               
+               if (pp-p < 6 && (*pp == '/' || *pp == '\0')) {
+                       memcpy(port_buf, p, (pp-p));
+                       port_buf[pp-p] = '\0';
+                       ret->port = atoi(port_buf);
+               } else {
+                       goto just_path;
+               }
+       } else {
+               just_path:
+               ret->path = estrndup(str, length);
                php_replace_controlchars(ret->path);
+               return ret;
        }
-
-       /* the query part */
-       if (subs[7].rm_so != -1 && subs[7].rm_so <= length) {
-               ret->query = estrndup(str + subs[7].rm_so, subs[7].rm_eo - subs[7].rm_so);
-               php_replace_controlchars(ret->query);
-       }
-
-       /* the fragment */
-       if (subs[9].rm_so != -1 && subs[9].rm_so <= length) {
-               ret->fragment = estrndup(str + subs[9].rm_so, subs[9].rm_eo - subs[9].rm_so);
-               php_replace_controlchars(ret->fragment);
+       
+       if (!(e = strchr(s, '/'))) {
+               e = ue;
        }
 
-       /* extract the username, pass, and port from the hostname */
-       if (subs[4].rm_so != -1 && subs[4].rm_so <= length) {
-
-               int cerr;
-               /* extract username:pass@host:port from regex results */
-               result = estrndup(str + subs[4].rm_so, subs[4].rm_eo - subs[4].rm_so);
-               length = strlen(result);
-
-               regfree(&re);                   /* free the old regex */
+       /* check for login and password */
+       if ((p = memchr(s, '@', (e-s)))) {
+               if ((pp = memchr(s, ':', (p-s)))) {
+                       if ((pp-s) > 0) {
+                               ret->user = estrndup(s, (pp-s));
+                               php_replace_controlchars(ret->user);
+                       }       
                
-               if (length) {
-                       if ((cerr=regcomp(&re, "^(([^@:]+)(:([^@:]+))?@)?((\\[([^]]+)\\])|([^:@]+))(:([^:@]+))?", REG_EXTENDED))
-                               || (err=regexec(&re, result, 11, subs, 0))) {
+                       if (p-pp > 1) { 
+                               ret->pass = estrndup(++pp, (p-pp-1));
+                               php_replace_controlchars(ret->pass);
+                       }       
+               }
+               
+               s = p + 1;
+       }
+       
+       /* check for port */
+       if ((p = memchr(s, ':', (e-s)))) {
+               if (!ret->port) {
+                       p++;
+                       if ( e-p > 5 || e-p < 1 ) { /* port cannot be longer then 5 characters */
                                STR_FREE(ret->scheme);
-                               STR_FREE(ret->path);
-                               STR_FREE(ret->query);
-                               STR_FREE(ret->fragment);
+                               STR_FREE(ret->user);
+                               STR_FREE(ret->pass);
                                efree(ret);
-                               efree(result);
-                               /*php_error(E_WARNING, "Unable to compile regex: %d\n", err);*/
-                               if (!cerr) regfree(&re); 
                                return NULL;
                        }
-                       /* now deal with all of the results */
-                       if (subs[2].rm_so != -1 && subs[2].rm_so < length) {
-                               ret->user = estrndup(result + subs[2].rm_so, subs[2].rm_eo - subs[2].rm_so);
-                               php_replace_controlchars(ret->user);
-                       }
-                       if (subs[4].rm_so != -1 && subs[4].rm_so < length) {
-                               ret->pass = estrndup(result + subs[4].rm_so, subs[4].rm_eo - subs[4].rm_so);
-                               php_replace_controlchars(ret->pass);
-                       }
-                       if (subs[7].rm_so != -1 && subs[7].rm_so < length) {
-                               ret->host = estrndup(result + subs[7].rm_so, subs[7].rm_eo - subs[7].rm_so);
-                               php_replace_controlchars(ret->host);
-                       } else if (subs[8].rm_so != -1 && subs[8].rm_so < length) {
-                               ret->host = estrndup(result + subs[8].rm_so, subs[8].rm_eo - subs[8].rm_so);
-                               php_replace_controlchars(ret->host);
-                       }
-                       if (subs[10].rm_so != -1 && subs[10].rm_so < length) {
-                               ret->port = (unsigned short) strtol(result + subs[10].rm_so, NULL, 10);
-                       }
-               }
-               efree(result);
+               
+                       memcpy(port_buf, p, (e-p));
+                       port_buf[e-p] = '\0';
+                       ret->port = atoi(port_buf);
+                       p--;
+               }       
+       } else {
+               p = e;
        }
-       else if (ret->scheme && !strcmp(ret->scheme, "http")) {
+       
+       /* check if we have a valid host, if we don't reject the string as url */
+       if ((p-s) < 1) {
                STR_FREE(ret->scheme);
-               STR_FREE(ret->path);
-               STR_FREE(ret->query);
-               STR_FREE(ret->fragment);
+               STR_FREE(ret->user);
+               STR_FREE(ret->pass);
                efree(ret);
-               regfree(&re);
                return NULL;
        }
-       regfree(&re);
+       
+       ret->host = estrndup(s, (p-s));
+       php_replace_controlchars(ret->host);
+       
+       if (e == ue) {
+               return ret;
+       }
+       
+       s = e;
+       
+       if ((p = strchr(s, '?'))) {
+               pp = strchr(s, '#');
+               
+               if (pp && pp < p) {
+                       p = pp;
+                       pp = strchr(pp+2, '#');
+               }
+       
+               if (p - s) {
+                       ret->path = estrndup(s, (p-s));
+                       php_replace_controlchars(ret->path);
+               }       
+       
+               if (pp) {
+                       if (pp - ++p) { 
+                               ret->query = estrndup(p, (pp-p));
+                               php_replace_controlchars(ret->query);
+                       }
+                       p = pp;
+                       goto label_parse;
+               } else if (++p - ue) {
+                       ret->query = estrndup(p, (ue-p));
+                       php_replace_controlchars(ret->query);
+               }
+       } else if ((p = strchr(s, '#'))) {
+               if (p - s) {
+                       ret->path = estrndup(s, (p-s));
+                       php_replace_controlchars(ret->path);
+               }       
+               
+               label_parse:
+               p++;
+               
+               if (ue - p) {
+                       ret->fragment = estrndup(p, (ue-p));
+                       php_replace_controlchars(ret->fragment);
+               }       
+       } else {
+               ret->path = estrndup(s, (ue-s));
+               php_replace_controlchars(ret->path);
+       }
+
        return ret;
 }
 /* }}} */