]> granicus.if.org Git - python/commitdiff
Be explicit about scheme_chars -- string.letters is locale dependent
authorGuido van Rossum <guido@python.org>
Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
committerGuido van Rossum <guido@python.org>
Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
so we can't use it.

While I'm at it, got rid of string module use.  (Found several new
hard special cases for a hypothetical conversion tool: from string
import join, find, rfind; and a local assignment "find=string.find".)

Lib/urlparse.py

index 929315e9630018183fb4ee3cadf6eff76af6b1c2..d927b7e777dcc404932fd6bc055420a4cda018d9 100644 (file)
@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
 UC Irvine, June 1995.
 """
 
-# Standard/builtin Python modules
-import string
-from string import join, split, rfind
-
 # A classification of schemes ('' means apply by default)
 uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
                 'https', 'shttp',
@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
                 'file', 'prospero', '']
 
 # Characters valid in scheme names
-scheme_chars = string.letters + string.digits + '+-.'
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                '0123456789'
+                '+-.')
 
 MAX_CACHE_SIZE = 20
 _parse_cache = {}
@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
                return cached
        if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
                clear_cache()
-       find = string.find
        netloc = path = params = query = fragment = ''
-       i = find(url, ':')
+       i = url.find(':')
        if i > 0:
                if url[:i] == 'http': # optimize the common case
-                       scheme = string.lower(url[:i])
+                       scheme = url[:i].lower()
                        url = url[i+1:]
                        if url[:2] == '//':
-                               i = find(url, '/', 2)
+                               i = url.find('/', 2)
                                if i < 0:
                                        i = len(url)
                                netloc = url[2:i]
                                url = url[i:]
                        if allow_fragments:
-                               i = string.rfind(url, '#')
+                               i = url.rfind('#')
                                if i >= 0:
                                        fragment = url[i+1:]
                                        url = url[:i]
-                       i = find(url, '?')
+                       i = url.find('?')
                        if i >= 0:
                                query = url[i+1:]
                                url = url[:i]
-                       i = find(url, ';')
+                       i = url.find(';')
                        if i >= 0:
                                params = url[i+1:]
                                url = url[:i]
@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
                        if c not in scheme_chars:
                                break
                else:
-                       scheme, url = string.lower(url[:i]), url[i+1:]
+                       scheme, url = url[:i].lower(), url[i+1:]
        if scheme in uses_netloc:
                if url[:2] == '//':
-                       i = find(url, '/', 2)
+                       i = url.find('/', 2)
                        if i < 0:
                                i = len(url)
                        netloc, url = url[2:i], url[i:]
        if allow_fragments and scheme in uses_fragment:
-               i = string.rfind(url, '#')
+               i = url.rfind('#')
                if i >= 0:
                        url, fragment = url[:i], url[i+1:]
        if scheme in uses_query:
-               i = find(url, '?')
+               i = url.find('?')
                if i >= 0:
                        url, query = url[:i], url[i+1:]
        if scheme in uses_params:
-               i = find(url, ';')
+               i = url.find(';')
                if i >= 0:
                        url, params = url[:i], url[i+1:]
        tuple = scheme, netloc, url, params, query, fragment
@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
        if not path:
                return urlunparse((scheme, netloc, bpath,
                                   params, query or bquery, fragment))
-       segments = split(bpath, '/')[:-1] + split(path, '/')
+       segments = bpath.split('/')[:-1] + path.split('/')
        # XXX The stuff below is bogus in various ways...
        if segments[-1] == '.':
                segments[-1] = ''
@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
                segments[-1] = ''
        elif len(segments) >= 2 and segments[-1] == '..':
                segments[-2:] = ['']
-       return urlunparse((scheme, netloc, join(segments, '/'),
+       return urlunparse((scheme, netloc, '/'.join(segments),
                           params, query, fragment))
 
 def urldefrag(url):
@@ -236,7 +234,7 @@ def test():
        while 1:
                line = fp.readline()
                if not line: break
-               words = string.split(line)
+               words = line.split()
                if not words:
                        continue
                url = words[0]