Be explicit about scheme_chars -- string.letters is locale dependent

author Guido van Rossum <guido@python.org>

Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)

committer Guido van Rossum <guido@python.org>

Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
author Guido van Rossum <guido@python.org>
Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
committer Guido van Rossum <guido@python.org>
Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
diff --git a/Lib/urlparse.py b/Lib/urlparse.py

index 929315e9630018183fb4ee3cadf6eff76af6b1c2..d927b7e777dcc404932fd6bc055420a4cda018d9 100644 (file)
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
  UC Irvine, June 1995.
  """
  
-# Standard/builtin Python modules
-import string
-from string import join, split, rfind
-
  # A classification of schemes ('' means apply by default)
  uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
                  'https', 'shttp',
@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
                  'file', 'prospero', '']
  
  # Characters valid in scheme names
-scheme_chars = string.letters + string.digits + '+-.'
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                '0123456789'
+                '+-.')
  
  MAX_CACHE_SIZE = 20
  _parse_cache = {}
@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
                 return cached
         if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
                 clear_cache()
-       find = string.find
         netloc = path = params = query = fragment = ''
-       i = find(url, ':')
+       i = url.find(':')
         if i > 0:
                 if url[:i] == 'http': # optimize the common case
-                       scheme = string.lower(url[:i])
+                       scheme = url[:i].lower()
                         url = url[i+1:]
                         if url[:2] == '//':
-                               i = find(url, '/', 2)
+                               i = url.find('/', 2)
                                 if i < 0:
                                         i = len(url)
                                 netloc = url[2:i]
                                 url = url[i:]
                         if allow_fragments:
-                               i = string.rfind(url, '#')
+                               i = url.rfind('#')
                                 if i >= 0:
                                         fragment = url[i+1:]
                                         url = url[:i]
-                       i = find(url, '?')
+                       i = url.find('?')
                         if i >= 0:
                                 query = url[i+1:]
                                 url = url[:i]
-                       i = find(url, ';')
+                       i = url.find(';')
                         if i >= 0:
                                 params = url[i+1:]
                                 url = url[:i]
@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
                         if c not in scheme_chars:
                                 break
                 else:
-                       scheme, url = string.lower(url[:i]), url[i+1:]
+                       scheme, url = url[:i].lower(), url[i+1:]
         if scheme in uses_netloc:
                 if url[:2] == '//':
-                       i = find(url, '/', 2)
+                       i = url.find('/', 2)
                         if i < 0:
                                 i = len(url)
                         netloc, url = url[2:i], url[i:]
         if allow_fragments and scheme in uses_fragment:
-               i = string.rfind(url, '#')
+               i = url.rfind('#')
                 if i >= 0:
                         url, fragment = url[:i], url[i+1:]
         if scheme in uses_query:
-               i = find(url, '?')
+               i = url.find('?')
                 if i >= 0:
                         url, query = url[:i], url[i+1:]
         if scheme in uses_params:
-               i = find(url, ';')
+               i = url.find(';')
                 if i >= 0:
                         url, params = url[:i], url[i+1:]
         tuple = scheme, netloc, url, params, query, fragment
@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
         if not path:
                 return urlunparse((scheme, netloc, bpath,
                                    params, query or bquery, fragment))
-       segments = split(bpath, '/')[:-1] + split(path, '/')
+       segments = bpath.split('/')[:-1] + path.split('/')
         # XXX The stuff below is bogus in various ways...
         if segments[-1] == '.':
                 segments[-1] = ''
@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
                 segments[-1] = ''
         elif len(segments) >= 2 and segments[-1] == '..':
                 segments[-2:] = ['']
-       return urlunparse((scheme, netloc, join(segments, '/'),
+       return urlunparse((scheme, netloc, '/'.join(segments),
                            params, query, fragment))
  
  def urldefrag(url):
@@ -236,7 +234,7 @@ def test():
         while 1:
                 line = fp.readline()
                 if not line: break
-               words = string.split(line)
+               words = line.split()
                 if not words:
                         continue
                 url = words[0]
author	Guido van Rossum <guido@python.org>
	Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)
committer	Guido van Rossum <guido@python.org>
	Tue, 19 Dec 2000 16:48:13 +0000 (16:48 +0000)