import time
import socket # For gethostbyaddr()
import string
-import rfc822
import mimetools
import SocketServer
host, port = self.socket.getsockname()
if not host or host == '0.0.0.0':
host = socket.gethostname()
- hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
- if '.' not in hostname:
- for host in hostnames:
- if '.' in host:
- hostname = host
- break
+ try:
+ hostname, hostnames, hostaddrs = socket.gethostbyaddr(host)
+ except socket.error:
+ hostname = host
+ else:
+ if '.' not in hostname:
+ for host in hostnames:
+ if '.' in host:
+ hostname = host
+ break
self.server_name = hostname
self.server_port = port
This server parses the request and the headers, and then calls a
function specific to the request type (<command>). Specifically,
- a request SPAM will be handled by a method handle_SPAM(). If no
+ a request SPAM will be handled by a method do_SPAM(). If no
such method exists the server sends an error response to the
client. If it exists, it is called with no arguments:
# where each string is of the form name[/version].
server_version = "BaseHTTP/" + __version__
- def handle(self):
- """Handle a single HTTP request.
+ def parse_request(self):
+ """Parse a request (internal).
- You normally don't need to override this method; see the class
- __doc__ string for information on how to handle specific HTTP
- commands such as GET and POST.
+ The request should be stored in self.raw_request; the results
+ are in self.command, self.path, self.request_version and
+ self.headers.
- """
+ Return value is 1 for success, 0 for failure; on failure, an
+ error is sent back.
- self.raw_requestline = self.rfile.readline()
+ """
self.request_version = version = "HTTP/0.9" # Default
requestline = self.raw_requestline
if requestline[-2:] == '\r\n':
[command, path, version] = words
if version[:5] != 'HTTP/':
self.send_error(400, "Bad request version (%s)" % `version`)
- return
+ return 0
elif len(words) == 2:
[command, path] = words
if command != 'GET':
self.send_error(400,
"Bad HTTP/0.9 request type (%s)" % `command`)
- return
+ return 0
else:
self.send_error(400, "Bad request syntax (%s)" % `requestline`)
- return
+ return 0
self.command, self.path, self.request_version = command, path, version
self.headers = self.MessageClass(self.rfile, 0)
- mname = 'do_' + command
+ return 1
+
+ def handle(self):
+ """Handle a single HTTP request.
+
+ You normally don't need to override this method; see the class
+ __doc__ string for information on how to handle specific HTTP
+ commands such as GET and POST.
+
+ """
+
+ self.raw_requestline = self.rfile.readline()
+ if not self.parse_request(): # An error code has been sent, just exit
+ return
+ mname = 'do_' + self.command
if not hasattr(self, mname):
- self.send_error(501, "Unsupported method (%s)" % `command`)
+ self.send_error(501, "Unsupported method (%s)" % `self.command`)
return
method = getattr(self, mname)
method()
This module builds on SimpleHTTPServer by implementing GET and POST
requests to cgi-bin scripts.
+If the os.fork() function is not present, this module will not work;
+SystemError will be raised instead.
+
"""
import os
-import sys
-import time
-import socket
import string
import urllib
import BaseHTTPServer
import SimpleHTTPServer
+try:
+ os.fork
+except AttributeError:
+ raise SystemError, __name__ + " requires os.fork()"
+
+
class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
"""Complete HTTP server with GET, HEAD and POST commands.
ua = self.headers.getheader('user-agent')
if ua:
env['HTTP_USER_AGENT'] = ua
+ co = filter(None, self.headers.getheaders('cookie'))
+ if co:
+ env['HTTP_COOKIE'] = string.join(co, ', ')
# XXX Other HTTP_* headers
decoded_query = string.replace(query, '+', ' ')
try:
import pwd
try:
nobody = pwd.getpwnam('nobody')[2]
- except pwd.error:
+ except KeyError:
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
return nobody
sections()
return all the configuration section names, sans DEFAULT
+ has_section(section)
+ return whether the given section exists
+
options(section)
return list of configuration options for the named section
+ has_option(section, option)
+ return whether the given section has the given option
+
read(filenames)
- read and parse the list of named configuration files
+ read and parse the list of named configuration files, given by
+ name. A single filename is also allowed. Non-existing files
+ are ignored.
+
+ readfp(fp, filename=None)
+ read and parse one configuration file, given as a file object.
+ The filename defaults to fp.name; it is only used in error
+ messages (if fp has no `name' attribute, the string `<???>' is used).
get(section, option, raw=0, vars=None)
return a string value for the named option. All % interpolations are
return self.__sections.has_key(section)
def options(self, section):
+ """Return a list of option names for the given section name."""
try:
opts = self.__sections[section].copy()
except KeyError:
opts.update(self.__defaults)
return opts.keys()
+ def has_option(self, section, option):
+ """Return whether the given section has the given option."""
+ try:
+ opts = self.__sections[section]
+ except KeyError:
+ raise NoSectionError(section)
+ return opts.has_key(option)
+
def read(self, filenames):
- """Read and parse a list of filenames."""
+ """Read and parse a filename or a list of filenames.
+
+ Files that cannot be opened are silently ignored; this is
+ designed so that you can specify a list of potential
+ configuration file locations (e.g. current directory, user's
+ home directory, systemwide directory), and all existing
+ configuration files in the list will be read. A single
+ filename may also be given.
+ """
if type(filenames) is type(''):
filenames = [filenames]
- for file in filenames:
+ for filename in filenames:
try:
- fp = open(file, 'r')
- self.__read(fp)
+ fp = open(filename)
except IOError:
- pass
+ continue
+ self.__read(fp, filename)
+ fp.close()
+
+ def readfp(self, fp, filename=None):
+ """Like read() but the argument must be a file-like object.
+
+ The `fp' argument must have a `readline' method. Optional
+ second argument is the `filename', which if not given, is
+ taken from fp.name. If fp has no `name' attribute, `<???>' is
+ used.
+
+ """
+ if filename is None:
+ try:
+ filename = fp.name
+ except AttributeError:
+ filename = '<???>'
+ self.__read(fp, filename)
def get(self, section, option, raw=0, vars=None):
"""Get an option value for a given section.
# Update with the entry specific variables
if vars:
d.update(vars)
- option = string.lower(option)
+ option = self.optionxform(option)
try:
rawval = d[option]
except KeyError:
depth = 0
while depth < 10: # Loop through this until it's done
depth = depth + 1
- if not string.find(value, "%("):
+ if string.find(value, "%(") >= 0:
try:
value = value % d
except KeyError, key:
raise ValueError, 'Not a boolean: %s' % v
return val
+ def optionxform(self, optionstr):
+ return string.lower(optionstr)
+
#
# Regular expressions for parsing section headers and options. Note a
# slight semantic change from the previous version, because of the use
# of \w, _ is allowed in section header names.
- __SECTCRE = re.compile(
+ SECTCRE = re.compile(
r'\[' # [
- r'(?P<header>[-\w]+)' # `-', `_' or any alphanum
+ r'(?P<header>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
r'\]' # ]
)
- __OPTCRE = re.compile(
- r'(?P<option>[-.\w]+)' # - . _ alphanum
- r'[ \t]*[:=][ \t]*' # any number of space/tab,
+ OPTCRE = re.compile(
+ r'(?P<option>[-\w_.*,(){}]+)' # a lot of stuff found by IvL
+ r'[ \t]*(?P<vi>[:=])[ \t]*' # any number of space/tab,
# followed by separator
# (either : or =), followed
# by any # space/tab
r'(?P<value>.*)$' # everything up to eol
)
- def __read(self, fp):
+ def __read(self, fp, fpname):
"""Parse a sectioned setup file.
The sections in setup file contains a title line at the top,
if string.strip(line) == '' or line[0] in '#;':
continue
if string.lower(string.split(line)[0]) == 'rem' \
- and line[0] == "r": # no leading whitespace
+ and line[0] in "rR": # no leading whitespace
continue
# continuation line?
if line[0] in ' \t' and cursect is not None and optname:
# a section header or option header?
else:
# is it a section header?
- mo = self.__SECTCRE.match(line)
+ mo = self.SECTCRE.match(line)
if mo:
sectname = mo.group('header')
if self.__sections.has_key(sectname):
optname = None
# no section header in the file?
elif cursect is None:
- raise MissingSectionHeaderError(fp.name, lineno, `line`)
+ raise MissingSectionHeaderError(fpname, lineno, `line`)
# an option line?
else:
- mo = self.__OPTCRE.match(line)
+ mo = self.OPTCRE.match(line)
if mo:
- optname, optval = mo.group('option', 'value')
+ optname, vi, optval = mo.group('option', 'vi', 'value')
optname = string.lower(optname)
+ if vi in ('=', ':') and ';' in optval:
+ # ';' is a comment delimiter only if it follows
+ # a spacing character
+ pos = string.find(optval, ';')
+ if pos and optval[pos-1] in string.whitespace:
+ optval = optval[:pos]
optval = string.strip(optval)
# allow empty values
if optval == '""':
# raised at the end of the file and will contain a
# list of all bogus lines
if not e:
- e = ParsingError(fp.name)
+ e = ParsingError(fpname)
e.append(lineno, `line`)
# if any parsing errors occurred, raise an exception
if e:
| |
| +-- IOError
| +-- OSError(*)
+ | |
+ | +-- WindowsError(*)
|
+-- EOFError
+-- RuntimeError
| +-- NotImplementedError(*)
|
+-- NameError
+ | |
+ | +-- UnboundLocalError(*)
+ |
+-- AttributeError
+-- SyntaxError
+-- TypeError
| +-- FloatingPointError
|
+-- ValueError
+ | |
+ | +-- UnicodeError(*)
+ |
+-- SystemError
+-- MemoryError
"""
"""OS system call failed."""
pass
+class WindowsError(OSError):
+ """MS-Windows OS system call failed."""
+ pass
+
class RuntimeError(StandardError):
"""Unspecified run-time error."""
pass
pass
class NameError(StandardError):
- """Name not found locally or globally."""
+ """Name not found globally."""
+ pass
+
+class UnboundLocalError(NameError):
+ """Local name referenced but not bound to a value."""
+ pass
+
+class UnicodeError(ValueError):
+ """Unicode related error."""
pass
class MemoryError(StandardError):
"""
-import sys, os
+import sys, os, stat
_state = None
-def input(files=(), inplace=0, backup=""):
+def input(files=None, inplace=0, backup=""):
global _state
if _state and _state._file:
raise RuntimeError, "input() already active"
class FileInput:
- def __init__(self, files=(), inplace=0, backup=""):
+ def __init__(self, files=None, inplace=0, backup=""):
if type(files) == type(''):
files = (files,)
else:
- files = tuple(files)
+ if files is None:
+ files = sys.argv[1:]
if not files:
- files = tuple(sys.argv[1:])
- if not files:
- files = ('-',)
+ files = ('-',)
+ else:
+ files = tuple(files)
self._files = files
self._inplace = inplace
self._backup = backup
self._filename + (self._backup or ".bak"))
try: os.unlink(self._backupfilename)
except os.error: pass
- # The next three lines may raise IOError
+ # The next few lines may raise IOError
os.rename(self._filename, self._backupfilename)
self._file = open(self._backupfilename, "r")
- self._output = open(self._filename, "w")
+ try:
+ perm = os.fstat(self._file.fileno())[stat.ST_MODE]
+ except:
+ self._output = open(self._filename, "w")
+ else:
+ fd = os.open(self._filename,
+ os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
+ perm)
+ self._output = os.fdopen(fd, "w")
+ try:
+ os.chmod(self._filename, perm)
+ except:
+ pass
self._savestdout = sys.stdout
sys.stdout = self._output
else:
+"""Generic output formatting.
+
+Formatter objects transform an abstract flow of formatting events into
+specific output events on writer objects. Formatters manage several stack
+structures to allow various properties of a writer object to be changed and
+restored; writers need not be able to handle relative changes nor any sort
+of ``change back'' operation. Specific writer properties which may be
+controlled via formatter objects are horizontal alignment, font, and left
+margin indentations. A mechanism is provided which supports providing
+arbitrary, non-exclusive style settings to a writer as well. Additional
+interfaces facilitate formatting events which are not reversible, such as
+paragraph separation.
+
+Writer objects encapsulate device interfaces. Abstract devices, such as
+file formats, are supported as well as physical devices. The provided
+implementations all work with abstract devices. The interface makes
+available mechanisms for setting the properties which formatter objects
+manage and inserting data into the output.
+"""
+
import string
import sys
from types import StringType
-# Gopher protocol client interface
+"""Gopher protocol client interface."""
import string
A_WHOIS = 'w'
A_QUERY = 'q'
A_GIF = 'g'
-A_HTML = 'h' # HTML file
-A_WWW = 'w' # WWW address
+A_HTML = 'h' # HTML file
+A_WWW = 'w' # WWW address
A_PLUS_IMAGE = ':'
A_PLUS_MOVIE = ';'
A_PLUS_SOUND = '<'
-# Function mapping all file types to strings; unknown types become TYPE='x'
_names = dir()
_type_to_name_map = {}
def type_to_name(gtype):
- global _type_to_name_map
- if _type_to_name_map=={}:
- for name in _names:
- if name[:2] == 'A_':
- _type_to_name_map[eval(name)] = name[2:]
- if _type_to_name_map.has_key(gtype):
- return _type_to_name_map[gtype]
- return 'TYPE=' + `gtype`
+ """Map all file types to strings; unknown types become TYPE='x'."""
+ global _type_to_name_map
+ if _type_to_name_map=={}:
+ for name in _names:
+ if name[:2] == 'A_':
+ _type_to_name_map[eval(name)] = name[2:]
+ if _type_to_name_map.has_key(gtype):
+ return _type_to_name_map[gtype]
+ return 'TYPE=' + `gtype`
# Names for characters and strings
CRLF = '\r\n'
TAB = '\t'
-# Send a selector to a given host and port, return a file with the reply
def send_selector(selector, host, port = 0):
- import socket
- import string
- if not port:
- i = string.find(host, ':')
- if i >= 0:
- host, port = host[:i], string.atoi(host[i+1:])
- if not port:
- port = DEF_PORT
- elif type(port) == type(''):
- port = string.atoi(port)
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- s.connect(host, port)
- s.send(selector + CRLF)
- s.shutdown(1)
- return s.makefile('rb')
-
-# Send a selector and a query string
+ """Send a selector to a given host and port, return a file with the reply."""
+ import socket
+ import string
+ if not port:
+ i = string.find(host, ':')
+ if i >= 0:
+ host, port = host[:i], string.atoi(host[i+1:])
+ if not port:
+ port = DEF_PORT
+ elif type(port) == type(''):
+ port = string.atoi(port)
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((host, port))
+ s.send(selector + CRLF)
+ s.shutdown(1)
+ return s.makefile('rb')
+
def send_query(selector, query, host, port = 0):
- return send_selector(selector + '\t' + query, host, port)
+ """Send a selector and a query string."""
+ return send_selector(selector + '\t' + query, host, port)
-# Takes a path as returned by urlparse and returns the appropriate selector
def path_to_selector(path):
- if path=="/":
- return "/"
- else:
- return path[2:] # Cuts initial slash and data type identifier
+ """Takes a path as returned by urlparse and returns the appropriate selector."""
+ if path=="/":
+ return "/"
+ else:
+ return path[2:] # Cuts initial slash and data type identifier
-# Takes a path as returned by urlparse and maps it to a string
-# See section 3.4 of RFC 1738 for details
def path_to_datatype_name(path):
- if path=="/":
- # No way to tell, although "INDEX" is likely
- return "TYPE='unknown'"
- else:
- return type_to_name(path[1])
+ """Takes a path as returned by urlparse and maps it to a string.
+ See section 3.4 of RFC 1738 for details."""
+ if path=="/":
+ # No way to tell, although "INDEX" is likely
+ return "TYPE='unknown'"
+ else:
+ return type_to_name(path[1])
# The following functions interpret the data returned by the gopher
# server according to the expected type, e.g. textfile or directory
-# Get a directory in the form of a list of entries
def get_directory(f):
- import string
- list = []
- while 1:
- line = f.readline()
- if not line:
- print '(Unexpected EOF from server)'
- break
- if line[-2:] == CRLF:
- line = line[:-2]
- elif line[-1:] in CRLF:
- line = line[:-1]
- if line == '.':
- break
- if not line:
- print '(Empty line from server)'
- continue
- gtype = line[0]
- parts = string.splitfields(line[1:], TAB)
- if len(parts) < 4:
- print '(Bad line from server:', `line`, ')'
- continue
- if len(parts) > 4:
- if parts[4:] != ['+']:
- print '(Extra info from server:',
- print parts[4:], ')'
- else:
- parts.append('')
- parts.insert(0, gtype)
- list.append(parts)
- return list
-
-# Get a text file as a list of lines, with trailing CRLF stripped
+ """Get a directory in the form of a list of entries."""
+ import string
+ list = []
+ while 1:
+ line = f.readline()
+ if not line:
+ print '(Unexpected EOF from server)'
+ break
+ if line[-2:] == CRLF:
+ line = line[:-2]
+ elif line[-1:] in CRLF:
+ line = line[:-1]
+ if line == '.':
+ break
+ if not line:
+ print '(Empty line from server)'
+ continue
+ gtype = line[0]
+ parts = string.splitfields(line[1:], TAB)
+ if len(parts) < 4:
+ print '(Bad line from server:', `line`, ')'
+ continue
+ if len(parts) > 4:
+ if parts[4:] != ['+']:
+ print '(Extra info from server:',
+ print parts[4:], ')'
+ else:
+ parts.append('')
+ parts.insert(0, gtype)
+ list.append(parts)
+ return list
+
def get_textfile(f):
- list = []
- get_alt_textfile(f, list.append)
- return list
+ """Get a text file as a list of lines, with trailing CRLF stripped."""
+ list = []
+ get_alt_textfile(f, list.append)
+ return list
-# Get a text file and pass each line to a function, with trailing CRLF stripped
def get_alt_textfile(f, func):
- while 1:
- line = f.readline()
- if not line:
- print '(Unexpected EOF from server)'
- break
- if line[-2:] == CRLF:
- line = line[:-2]
- elif line[-1:] in CRLF:
- line = line[:-1]
- if line == '.':
- break
- if line[:2] == '..':
- line = line[1:]
- func(line)
-
-# Get a binary file as one solid data block
+ """Get a text file and pass each line to a function, with trailing CRLF stripped."""
+ while 1:
+ line = f.readline()
+ if not line:
+ print '(Unexpected EOF from server)'
+ break
+ if line[-2:] == CRLF:
+ line = line[:-2]
+ elif line[-1:] in CRLF:
+ line = line[:-1]
+ if line == '.':
+ break
+ if line[:2] == '..':
+ line = line[1:]
+ func(line)
+
def get_binary(f):
- data = f.read()
- return data
+ """Get a binary file as one solid data block."""
+ data = f.read()
+ return data
-# Get a binary file and pass each block to a function
def get_alt_binary(f, func, blocksize):
- while 1:
- data = f.read(blocksize)
- if not data:
- break
- func(data)
+ """Get a binary file and pass each block to a function."""
+ while 1:
+ data = f.read(blocksize)
+ if not data:
+ break
+ func(data)
-# Trivial test program
def test():
- import sys
- import getopt
- opts, args = getopt.getopt(sys.argv[1:], '')
- selector = DEF_SELECTOR
- type = selector[0]
- host = DEF_HOST
- port = DEF_PORT
- if args:
- host = args[0]
- args = args[1:]
- if args:
- type = args[0]
- args = args[1:]
- if len(type) > 1:
- type, selector = type[0], type
- else:
- selector = ''
- if args:
- selector = args[0]
- args = args[1:]
- query = ''
- if args:
- query = args[0]
- args = args[1:]
- if type == A_INDEX:
- f = send_query(selector, query, host)
- else:
- f = send_selector(selector, host)
- if type == A_TEXT:
- list = get_textfile(f)
- for item in list: print item
- elif type in (A_MENU, A_INDEX):
- list = get_directory(f)
- for item in list: print item
- else:
- data = get_binary(f)
- print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
+ """Trivial test program."""
+ import sys
+ import getopt
+ opts, args = getopt.getopt(sys.argv[1:], '')
+ selector = DEF_SELECTOR
+ type = selector[0]
+ host = DEF_HOST
+ port = DEF_PORT
+ if args:
+ host = args[0]
+ args = args[1:]
+ if args:
+ type = args[0]
+ args = args[1:]
+ if len(type) > 1:
+ type, selector = type[0], type
+ else:
+ selector = ''
+ if args:
+ selector = args[0]
+ args = args[1:]
+ query = ''
+ if args:
+ query = args[0]
+ args = args[1:]
+ if type == A_INDEX:
+ f = send_query(selector, query, host)
+ else:
+ f = send_selector(selector, host)
+ if type == A_TEXT:
+ list = get_textfile(f)
+ for item in list: print item
+ elif type in (A_MENU, A_INDEX):
+ list = get_directory(f)
+ for item in list: print item
+ else:
+ data = get_binary(f)
+ print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
# Run the test when run as script
if __name__ == '__main__':
- test()
+ test()
-# Proposed entity definitions for HTML, taken from
-# http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_14.html
+"""HTML character entity references."""
entitydefs = {
- 'lt': '<',
- 'gt': '>',
- 'amp': '&',
- 'quot': '"',
- 'nbsp': chr(160), # no-break space
- 'iexcl': chr(161), # inverted exclamation mark
- 'cent': chr(162), # cent sign
- 'pound': chr(163), # pound sterling sign
- 'curren': chr(164), # general currency sign
- 'yen': chr(165), # yen sign
- 'brvbar': chr(166), # broken (vertical) bar
- 'sect': chr(167), # section sign
- 'uml': chr(168), # umlaut (dieresis)
- 'copy': chr(169), # copyright sign
- 'ordf': chr(170), # ordinal indicator, feminine
- 'laquo': chr(171), # angle quotation mark, left
- 'not': chr(172), # not sign
- 'shy': chr(173), # soft hyphen
- 'reg': chr(174), # registered sign
- 'macr': chr(175), # macron
- 'deg': chr(176), # degree sign
- 'plusmn': chr(177), # plus-or-minus sign
- 'sup2': chr(178), # superscript two
- 'sup3': chr(179), # superscript three
- 'acute': chr(180), # acute accent
- 'micro': chr(181), # micro sign
- 'para': chr(182), # pilcrow (paragraph sign)
- 'middot': chr(183), # middle dot
- 'cedil': chr(184), # cedilla
- 'sup1': chr(185), # superscript one
- 'ordm': chr(186), # ordinal indicator, masculine
- 'raquo': chr(187), # angle quotation mark, right
- 'frac14': chr(188), # fraction one-quarter
- 'frac12': chr(189), # fraction one-half
- 'frac34': chr(190), # fraction three-quarters
- 'iquest': chr(191), # inverted question mark
- 'Agrave': chr(192), # capital A, grave accent
- 'Aacute': chr(193), # capital A, acute accent
- 'Acirc': chr(194), # capital A, circumflex accent
- 'Atilde': chr(195), # capital A, tilde
- 'Auml': chr(196), # capital A, dieresis or umlaut mark
- 'Aring': chr(197), # capital A, ring
- 'AElig': chr(198), # capital AE diphthong (ligature)
- 'Ccedil': chr(199), # capital C, cedilla
- 'Egrave': chr(200), # capital E, grave accent
- 'Eacute': chr(201), # capital E, acute accent
- 'Ecirc': chr(202), # capital E, circumflex accent
- 'Euml': chr(203), # capital E, dieresis or umlaut mark
- 'Igrave': chr(204), # capital I, grave accent
- 'Iacute': chr(205), # capital I, acute accent
- 'Icirc': chr(206), # capital I, circumflex accent
- 'Iuml': chr(207), # capital I, dieresis or umlaut mark
- 'ETH': chr(208), # capital Eth, Icelandic
- 'Ntilde': chr(209), # capital N, tilde
- 'Ograve': chr(210), # capital O, grave accent
- 'Oacute': chr(211), # capital O, acute accent
- 'Ocirc': chr(212), # capital O, circumflex accent
- 'Otilde': chr(213), # capital O, tilde
- 'Ouml': chr(214), # capital O, dieresis or umlaut mark
- 'times': chr(215), # multiply sign
- 'Oslash': chr(216), # capital O, slash
- 'Ugrave': chr(217), # capital U, grave accent
- 'Uacute': chr(218), # capital U, acute accent
- 'Ucirc': chr(219), # capital U, circumflex accent
- 'Uuml': chr(220), # capital U, dieresis or umlaut mark
- 'Yacute': chr(221), # capital Y, acute accent
- 'THORN': chr(222), # capital THORN, Icelandic
- 'szlig': chr(223), # small sharp s, German (sz ligature)
- 'agrave': chr(224), # small a, grave accent
- 'aacute': chr(225), # small a, acute accent
- 'acirc': chr(226), # small a, circumflex accent
- 'atilde': chr(227), # small a, tilde
- 'auml': chr(228), # small a, dieresis or umlaut mark
- 'aring': chr(229), # small a, ring
- 'aelig': chr(230), # small ae diphthong (ligature)
- 'ccedil': chr(231), # small c, cedilla
- 'egrave': chr(232), # small e, grave accent
- 'eacute': chr(233), # small e, acute accent
- 'ecirc': chr(234), # small e, circumflex accent
- 'euml': chr(235), # small e, dieresis or umlaut mark
- 'igrave': chr(236), # small i, grave accent
- 'iacute': chr(237), # small i, acute accent
- 'icirc': chr(238), # small i, circumflex accent
- 'iuml': chr(239), # small i, dieresis or umlaut mark
- 'eth': chr(240), # small eth, Icelandic
- 'ntilde': chr(241), # small n, tilde
- 'ograve': chr(242), # small o, grave accent
- 'oacute': chr(243), # small o, acute accent
- 'ocirc': chr(244), # small o, circumflex accent
- 'otilde': chr(245), # small o, tilde
- 'ouml': chr(246), # small o, dieresis or umlaut mark
- 'divide': chr(247), # divide sign
- 'oslash': chr(248), # small o, slash
- 'ugrave': chr(249), # small u, grave accent
- 'uacute': chr(250), # small u, acute accent
- 'ucirc': chr(251), # small u, circumflex accent
- 'uuml': chr(252), # small u, dieresis or umlaut mark
- 'yacute': chr(253), # small y, acute accent
- 'thorn': chr(254), # small thorn, Icelandic
- 'yuml': chr(255), # small y, dieresis or umlaut mark
+ 'AElig': '\306', # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
+ 'Aacute': '\301', # latin capital letter A with acute, U+00C1 ISOlat1
+ 'Acirc': '\302', # latin capital letter A with circumflex, U+00C2 ISOlat1
+ 'Agrave': '\300', # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
+ 'Alpha': 'Α', # greek capital letter alpha, U+0391
+ 'Aring': '\305', # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
+ 'Atilde': '\303', # latin capital letter A with tilde, U+00C3 ISOlat1
+ 'Auml': '\304', # latin capital letter A with diaeresis, U+00C4 ISOlat1
+ 'Beta': 'Β', # greek capital letter beta, U+0392
+ 'Ccedil': '\307', # latin capital letter C with cedilla, U+00C7 ISOlat1
+ 'Chi': 'Χ', # greek capital letter chi, U+03A7
+ 'Dagger': '‡', # double dagger, U+2021 ISOpub
+ 'Delta': 'Δ', # greek capital letter delta, U+0394 ISOgrk3
+ 'ETH': '\320', # latin capital letter ETH, U+00D0 ISOlat1
+ 'Eacute': '\311', # latin capital letter E with acute, U+00C9 ISOlat1
+ 'Ecirc': '\312', # latin capital letter E with circumflex, U+00CA ISOlat1
+ 'Egrave': '\310', # latin capital letter E with grave, U+00C8 ISOlat1
+ 'Epsilon': 'Ε', # greek capital letter epsilon, U+0395
+ 'Eta': 'Η', # greek capital letter eta, U+0397
+ 'Euml': '\313', # latin capital letter E with diaeresis, U+00CB ISOlat1
+ 'Gamma': 'Γ', # greek capital letter gamma, U+0393 ISOgrk3
+ 'Iacute': '\315', # latin capital letter I with acute, U+00CD ISOlat1
+ 'Icirc': '\316', # latin capital letter I with circumflex, U+00CE ISOlat1
+ 'Igrave': '\314', # latin capital letter I with grave, U+00CC ISOlat1
+ 'Iota': 'Ι', # greek capital letter iota, U+0399
+ 'Iuml': '\317', # latin capital letter I with diaeresis, U+00CF ISOlat1
+ 'Kappa': 'Κ', # greek capital letter kappa, U+039A
+ 'Lambda': 'Λ', # greek capital letter lambda, U+039B ISOgrk3
+ 'Mu': 'Μ', # greek capital letter mu, U+039C
+ 'Ntilde': '\321', # latin capital letter N with tilde, U+00D1 ISOlat1
+ 'Nu': 'Ν', # greek capital letter nu, U+039D
+ 'OElig': 'Œ', # latin capital ligature OE, U+0152 ISOlat2
+ 'Oacute': '\323', # latin capital letter O with acute, U+00D3 ISOlat1
+ 'Ocirc': '\324', # latin capital letter O with circumflex, U+00D4 ISOlat1
+ 'Ograve': '\322', # latin capital letter O with grave, U+00D2 ISOlat1
+ 'Omega': 'Ω', # greek capital letter omega, U+03A9 ISOgrk3
+ 'Omicron': 'Ο', # greek capital letter omicron, U+039F
+ 'Oslash': '\330', # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
+ 'Otilde': '\325', # latin capital letter O with tilde, U+00D5 ISOlat1
+ 'Ouml': '\326', # latin capital letter O with diaeresis, U+00D6 ISOlat1
+ 'Phi': 'Φ', # greek capital letter phi, U+03A6 ISOgrk3
+ 'Pi': 'Π', # greek capital letter pi, U+03A0 ISOgrk3
+ 'Prime': '″', # double prime = seconds = inches, U+2033 ISOtech
+ 'Psi': 'Ψ', # greek capital letter psi, U+03A8 ISOgrk3
+ 'Rho': 'Ρ', # greek capital letter rho, U+03A1
+ 'Scaron': 'Š', # latin capital letter S with caron, U+0160 ISOlat2
+ 'Sigma': 'Σ', # greek capital letter sigma, U+03A3 ISOgrk3
+ 'THORN': '\336', # latin capital letter THORN, U+00DE ISOlat1
+ 'Tau': 'Τ', # greek capital letter tau, U+03A4
+ 'Theta': 'Θ', # greek capital letter theta, U+0398 ISOgrk3
+ 'Uacute': '\332', # latin capital letter U with acute, U+00DA ISOlat1
+ 'Ucirc': '\333', # latin capital letter U with circumflex, U+00DB ISOlat1
+ 'Ugrave': '\331', # latin capital letter U with grave, U+00D9 ISOlat1
+ 'Upsilon': 'Υ', # greek capital letter upsilon, U+03A5 ISOgrk3
+ 'Uuml': '\334', # latin capital letter U with diaeresis, U+00DC ISOlat1
+ 'Xi': 'Ξ', # greek capital letter xi, U+039E ISOgrk3
+ 'Yacute': '\335', # latin capital letter Y with acute, U+00DD ISOlat1
+ 'Yuml': 'Ÿ', # latin capital letter Y with diaeresis, U+0178 ISOlat2
+ 'Zeta': 'Ζ', # greek capital letter zeta, U+0396
+ 'aacute': '\341', # latin small letter a with acute, U+00E1 ISOlat1
+ 'acirc': '\342', # latin small letter a with circumflex, U+00E2 ISOlat1
+ 'acute': '\264', # acute accent = spacing acute, U+00B4 ISOdia
+ 'aelig': '\346', # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
+ 'agrave': '\340', # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
+ 'alefsym': 'ℵ', # alef symbol = first transfinite cardinal, U+2135 NEW
+ 'alpha': 'α', # greek small letter alpha, U+03B1 ISOgrk3
+ 'amp': '\46', # ampersand, U+0026 ISOnum
+ 'and': '∧', # logical and = wedge, U+2227 ISOtech
+ 'ang': '∠', # angle, U+2220 ISOamso
+ 'aring': '\345', # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
+ 'asymp': '≈', # almost equal to = asymptotic to, U+2248 ISOamsr
+ 'atilde': '\343', # latin small letter a with tilde, U+00E3 ISOlat1
+ 'auml': '\344', # latin small letter a with diaeresis, U+00E4 ISOlat1
+ 'bdquo': '„', # double low-9 quotation mark, U+201E NEW
+ 'beta': 'β', # greek small letter beta, U+03B2 ISOgrk3
+ 'brvbar': '\246', # broken bar = broken vertical bar, U+00A6 ISOnum
+ 'bull': '•', # bullet = black small circle, U+2022 ISOpub
+ 'cap': '∩', # intersection = cap, U+2229 ISOtech
+ 'ccedil': '\347', # latin small letter c with cedilla, U+00E7 ISOlat1
+ 'cedil': '\270', # cedilla = spacing cedilla, U+00B8 ISOdia
+ 'cent': '\242', # cent sign, U+00A2 ISOnum
+ 'chi': 'χ', # greek small letter chi, U+03C7 ISOgrk3
+ 'circ': 'ˆ', # modifier letter circumflex accent, U+02C6 ISOpub
+ 'clubs': '♣', # black club suit = shamrock, U+2663 ISOpub
+ 'cong': '≅', # approximately equal to, U+2245 ISOtech
+ 'copy': '\251', # copyright sign, U+00A9 ISOnum
+ 'crarr': '↵', # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
+ 'cup': '∪', # union = cup, U+222A ISOtech
+ 'curren': '\244', # currency sign, U+00A4 ISOnum
+ 'dArr': '⇓', # downwards double arrow, U+21D3 ISOamsa
+ 'dagger': '†', # dagger, U+2020 ISOpub
+ 'darr': '↓', # downwards arrow, U+2193 ISOnum
+ 'deg': '\260', # degree sign, U+00B0 ISOnum
+ 'delta': 'δ', # greek small letter delta, U+03B4 ISOgrk3
+ 'diams': '♦', # black diamond suit, U+2666 ISOpub
+ 'divide': '\367', # division sign, U+00F7 ISOnum
+ 'eacute': '\351', # latin small letter e with acute, U+00E9 ISOlat1
+ 'ecirc': '\352', # latin small letter e with circumflex, U+00EA ISOlat1
+ 'egrave': '\350', # latin small letter e with grave, U+00E8 ISOlat1
+ 'empty': '∅', # empty set = null set = diameter, U+2205 ISOamso
+ 'emsp': ' ', # em space, U+2003 ISOpub
+ 'ensp': ' ', # en space, U+2002 ISOpub
+ 'epsilon': 'ε', # greek small letter epsilon, U+03B5 ISOgrk3
+ 'equiv': '≡', # identical to, U+2261 ISOtech
+ 'eta': 'η', # greek small letter eta, U+03B7 ISOgrk3
+ 'eth': '\360', # latin small letter eth, U+00F0 ISOlat1
+ 'euml': '\353', # latin small letter e with diaeresis, U+00EB ISOlat1
+ 'euro': '€', # euro sign, U+20AC NEW
+ 'exist': '∃', # there exists, U+2203 ISOtech
+ 'fnof': 'ƒ', # latin small f with hook = function = florin, U+0192 ISOtech
+ 'forall': '∀', # for all, U+2200 ISOtech
+ 'frac12': '\275', # vulgar fraction one half = fraction one half, U+00BD ISOnum
+ 'frac14': '\274', # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
+ 'frac34': '\276', # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
+ 'frasl': '⁄', # fraction slash, U+2044 NEW
+ 'gamma': 'γ', # greek small letter gamma, U+03B3 ISOgrk3
+ 'ge': '≥', # greater-than or equal to, U+2265 ISOtech
+ 'gt': '\76', # greater-than sign, U+003E ISOnum
+ 'hArr': '⇔', # left right double arrow, U+21D4 ISOamsa
+ 'harr': '↔', # left right arrow, U+2194 ISOamsa
+ 'hearts': '♥', # black heart suit = valentine, U+2665 ISOpub
+ 'hellip': '…', # horizontal ellipsis = three dot leader, U+2026 ISOpub
+ 'iacute': '\355', # latin small letter i with acute, U+00ED ISOlat1
+ 'icirc': '\356', # latin small letter i with circumflex, U+00EE ISOlat1
+ 'iexcl': '\241', # inverted exclamation mark, U+00A1 ISOnum
+ 'igrave': '\354', # latin small letter i with grave, U+00EC ISOlat1
+ 'image': 'ℑ', # blackletter capital I = imaginary part, U+2111 ISOamso
+ 'infin': '∞', # infinity, U+221E ISOtech
+ 'int': '∫', # integral, U+222B ISOtech
+ 'iota': 'ι', # greek small letter iota, U+03B9 ISOgrk3
+ 'iquest': '\277', # inverted question mark = turned question mark, U+00BF ISOnum
+ 'isin': '∈', # element of, U+2208 ISOtech
+ 'iuml': '\357', # latin small letter i with diaeresis, U+00EF ISOlat1
+ 'kappa': 'κ', # greek small letter kappa, U+03BA ISOgrk3
+ 'lArr': '⇐', # leftwards double arrow, U+21D0 ISOtech
+ 'lambda': 'λ', # greek small letter lambda, U+03BB ISOgrk3
+ 'lang': '〈', # left-pointing angle bracket = bra, U+2329 ISOtech
+ 'laquo': '\253', # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
+ 'larr': '←', # leftwards arrow, U+2190 ISOnum
+ 'lceil': '⌈', # left ceiling = apl upstile, U+2308 ISOamsc
+ 'ldquo': '“', # left double quotation mark, U+201C ISOnum
+ 'le': '≤', # less-than or equal to, U+2264 ISOtech
+ 'lfloor': '⌊', # left floor = apl downstile, U+230A ISOamsc
+ 'lowast': '∗', # asterisk operator, U+2217 ISOtech
+ 'loz': '◊', # lozenge, U+25CA ISOpub
+ 'lrm': '‎', # left-to-right mark, U+200E NEW RFC 2070
+ 'lsaquo': '‹', # single left-pointing angle quotation mark, U+2039 ISO proposed
+ 'lsquo': '‘', # left single quotation mark, U+2018 ISOnum
+ 'lt': '\74', # less-than sign, U+003C ISOnum
+ 'macr': '\257', # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
+ 'mdash': '—', # em dash, U+2014 ISOpub
+ 'micro': '\265', # micro sign, U+00B5 ISOnum
+ 'middot': '\267', # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
+ 'minus': '−', # minus sign, U+2212 ISOtech
+ 'mu': 'μ', # greek small letter mu, U+03BC ISOgrk3
+ 'nabla': '∇', # nabla = backward difference, U+2207 ISOtech
+ 'nbsp': '\240', # no-break space = non-breaking space, U+00A0 ISOnum
+ 'ndash': '–', # en dash, U+2013 ISOpub
+ 'ne': '≠', # not equal to, U+2260 ISOtech
+ 'ni': '∋', # contains as member, U+220B ISOtech
+ 'not': '\254', # not sign, U+00AC ISOnum
+ 'notin': '∉', # not an element of, U+2209 ISOtech
+ 'nsub': '⊄', # not a subset of, U+2284 ISOamsn
+ 'ntilde': '\361', # latin small letter n with tilde, U+00F1 ISOlat1
+ 'nu': 'ν', # greek small letter nu, U+03BD ISOgrk3
+ 'oacute': '\363', # latin small letter o with acute, U+00F3 ISOlat1
+ 'ocirc': '\364', # latin small letter o with circumflex, U+00F4 ISOlat1
+ 'oelig': 'œ', # latin small ligature oe, U+0153 ISOlat2
+ 'ograve': '\362', # latin small letter o with grave, U+00F2 ISOlat1
+ 'oline': '‾', # overline = spacing overscore, U+203E NEW
+ 'omega': 'ω', # greek small letter omega, U+03C9 ISOgrk3
+ 'omicron': 'ο', # greek small letter omicron, U+03BF NEW
+ 'oplus': '⊕', # circled plus = direct sum, U+2295 ISOamsb
+ 'or': '∨', # logical or = vee, U+2228 ISOtech
+ 'ordf': '\252', # feminine ordinal indicator, U+00AA ISOnum
+ 'ordm': '\272', # masculine ordinal indicator, U+00BA ISOnum
+ 'oslash': '\370', # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
+ 'otilde': '\365', # latin small letter o with tilde, U+00F5 ISOlat1
+ 'otimes': '⊗', # circled times = vector product, U+2297 ISOamsb
+ 'ouml': '\366', # latin small letter o with diaeresis, U+00F6 ISOlat1
+ 'para': '\266', # pilcrow sign = paragraph sign, U+00B6 ISOnum
+ 'part': '∂', # partial differential, U+2202 ISOtech
+ 'permil': '‰', # per mille sign, U+2030 ISOtech
+ 'perp': '⊥', # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
+ 'phi': 'φ', # greek small letter phi, U+03C6 ISOgrk3
+ 'pi': 'π', # greek small letter pi, U+03C0 ISOgrk3
+ 'piv': 'ϖ', # greek pi symbol, U+03D6 ISOgrk3
+ 'plusmn': '\261', # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
+ 'pound': '\243', # pound sign, U+00A3 ISOnum
+ 'prime': '′', # prime = minutes = feet, U+2032 ISOtech
+ 'prod': '∏', # n-ary product = product sign, U+220F ISOamsb
+ 'prop': '∝', # proportional to, U+221D ISOtech
+ 'psi': 'ψ', # greek small letter psi, U+03C8 ISOgrk3
+ 'quot': '\42', # quotation mark = APL quote, U+0022 ISOnum
+ 'rArr': '⇒', # rightwards double arrow, U+21D2 ISOtech
+ 'radic': '√', # square root = radical sign, U+221A ISOtech
+ 'rang': '〉', # right-pointing angle bracket = ket, U+232A ISOtech
+ 'raquo': '\273', # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
+ 'rarr': '→', # rightwards arrow, U+2192 ISOnum
+ 'rceil': '⌉', # right ceiling, U+2309 ISOamsc
+ 'rdquo': '”', # right double quotation mark, U+201D ISOnum
+ 'real': 'ℜ', # blackletter capital R = real part symbol, U+211C ISOamso
+ 'reg': '\256', # registered sign = registered trade mark sign, U+00AE ISOnum
+ 'rfloor': '⌋', # right floor, U+230B ISOamsc
+ 'rho': 'ρ', # greek small letter rho, U+03C1 ISOgrk3
+ 'rlm': '‏', # right-to-left mark, U+200F NEW RFC 2070
+ 'rsaquo': '›', # single right-pointing angle quotation mark, U+203A ISO proposed
+ 'rsquo': '’', # right single quotation mark, U+2019 ISOnum
+ 'sbquo': '‚', # single low-9 quotation mark, U+201A NEW
+ 'scaron': 'š', # latin small letter s with caron, U+0161 ISOlat2
+ 'sdot': '⋅', # dot operator, U+22C5 ISOamsb
+ 'sect': '\247', # section sign, U+00A7 ISOnum
+ 'shy': '\255', # soft hyphen = discretionary hyphen, U+00AD ISOnum
+ 'sigma': 'σ', # greek small letter sigma, U+03C3 ISOgrk3
+ 'sigmaf': 'ς', # greek small letter final sigma, U+03C2 ISOgrk3
+ 'sim': '∼', # tilde operator = varies with = similar to, U+223C ISOtech
+ 'spades': '♠', # black spade suit, U+2660 ISOpub
+ 'sub': '⊂', # subset of, U+2282 ISOtech
+ 'sube': '⊆', # subset of or equal to, U+2286 ISOtech
+ 'sum': '∑', # n-ary sumation, U+2211 ISOamsb
+ 'sup': '⊃', # superset of, U+2283 ISOtech
+ 'sup1': '\271', # superscript one = superscript digit one, U+00B9 ISOnum
+ 'sup2': '\262', # superscript two = superscript digit two = squared, U+00B2 ISOnum
+ 'sup3': '\263', # superscript three = superscript digit three = cubed, U+00B3 ISOnum
+ 'supe': '⊇', # superset of or equal to, U+2287 ISOtech
+ 'szlig': '\337', # latin small letter sharp s = ess-zed, U+00DF ISOlat1
+ 'tau': 'τ', # greek small letter tau, U+03C4 ISOgrk3
+ 'there4': '∴', # therefore, U+2234 ISOtech
+ 'theta': 'θ', # greek small letter theta, U+03B8 ISOgrk3
+ 'thetasym': 'ϑ', # greek small letter theta symbol, U+03D1 NEW
+ 'thinsp': ' ', # thin space, U+2009 ISOpub
+ 'thorn': '\376', # latin small letter thorn with, U+00FE ISOlat1
+ 'tilde': '˜', # small tilde, U+02DC ISOdia
+ 'times': '\327', # multiplication sign, U+00D7 ISOnum
+ 'trade': '™', # trade mark sign, U+2122 ISOnum
+ 'uArr': '⇑', # upwards double arrow, U+21D1 ISOamsa
+ 'uacute': '\372', # latin small letter u with acute, U+00FA ISOlat1
+ 'uarr': '↑', # upwards arrow, U+2191 ISOnum
+ 'ucirc': '\373', # latin small letter u with circumflex, U+00FB ISOlat1
+ 'ugrave': '\371', # latin small letter u with grave, U+00F9 ISOlat1
+ 'uml': '\250', # diaeresis = spacing diaeresis, U+00A8 ISOdia
+ 'upsih': 'ϒ', # greek upsilon with hook symbol, U+03D2 NEW
+ 'upsilon': 'υ', # greek small letter upsilon, U+03C5 ISOgrk3
+ 'uuml': '\374', # latin small letter u with diaeresis, U+00FC ISOlat1
+ 'weierp': '℘', # script capital P = power set = Weierstrass p, U+2118 ISOamso
+ 'xi': 'ξ', # greek small letter xi, U+03BE ISOgrk3
+ 'yacute': '\375', # latin small letter y with acute, U+00FD ISOlat1
+ 'yen': '\245', # yen sign = yuan sign, U+00A5 ISOnum
+ 'yuml': '\377', # latin small letter y with diaeresis, U+00FF ISOlat1
+ 'zeta': 'ζ', # greek small letter zeta, U+03B6 ISOgrk3
+ 'zwj': '‍', # zero width joiner, U+200D NEW RFC 2070
+ 'zwnj': '‌', # zero width non-joiner, U+200C NEW RFC 2070
+
}
-# Cache lines from files.
-# This is intended to read lines from modules imported -- hence if a filename
-# is not found, it will look down the module search path for a file by
-# that name.
+"""Cache lines from files.
+
+This is intended to read lines from modules imported -- hence if a filename
+is not found, it will look down the module search path for a file by
+that name.
+"""
import sys
import os
from stat import *
def getline(filename, lineno):
- lines = getlines(filename)
- if 1 <= lineno <= len(lines):
- return lines[lineno-1]
- else:
- return ''
+ lines = getlines(filename)
+ if 1 <= lineno <= len(lines):
+ return lines[lineno-1]
+ else:
+ return ''
# The cache
cache = {} # The cache
-# Clear the cache entirely
-
def clearcache():
- global cache
- cache = {}
+ """Clear the cache entirely."""
+ global cache
+ cache = {}
-# Get the lines for a file from the cache.
-# Update the cache if it doesn't contain an entry for this file already.
def getlines(filename):
- if cache.has_key(filename):
- return cache[filename][2]
- else:
- return updatecache(filename)
+ """Get the lines for a file from the cache.
+ Update the cache if it doesn't contain an entry for this file already."""
+ if cache.has_key(filename):
+ return cache[filename][2]
+ else:
+ return updatecache(filename)
-# Discard cache entries that are out of date.
-# (This is not checked upon each call!)
def checkcache():
- for filename in cache.keys():
- size, mtime, lines, fullname = cache[filename]
- try:
- stat = os.stat(fullname)
- except os.error:
- del cache[filename]
- continue
- if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
- del cache[filename]
+ """Discard cache entries that are out of date.
+ (This is not checked upon each call!)"""
+ for filename in cache.keys():
+ size, mtime, lines, fullname = cache[filename]
+ try:
+ stat = os.stat(fullname)
+ except os.error:
+ del cache[filename]
+ continue
+ if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]:
+ del cache[filename]
-# Update a cache entry and return its list of lines.
-# If something's wrong, print a message, discard the cache entry,
-# and return an empty list.
def updatecache(filename):
- if cache.has_key(filename):
- del cache[filename]
- if not filename or filename[0] + filename[-1] == '<>':
- return []
- fullname = filename
- try:
- stat = os.stat(fullname)
- except os.error, msg:
- # Try looking through the module search path
- basename = os.path.split(filename)[1]
- for dirname in sys.path:
- fullname = os.path.join(dirname, basename)
- try:
- stat = os.stat(fullname)
- break
- except os.error:
- pass
- else:
- # No luck
-## print '*** Cannot stat', filename, ':', msg
- return []
- try:
- fp = open(fullname, 'r')
- lines = fp.readlines()
- fp.close()
- except IOError, msg:
-## print '*** Cannot open', fullname, ':', msg
- return []
- size, mtime = stat[ST_SIZE], stat[ST_MTIME]
- cache[filename] = size, mtime, lines, fullname
- return lines
+ """Update a cache entry and return its list of lines.
+ If something's wrong, print a message, discard the cache entry,
+ and return an empty list."""
+
+ if cache.has_key(filename):
+ del cache[filename]
+ if not filename or filename[0] + filename[-1] == '<>':
+ return []
+ fullname = filename
+ try:
+ stat = os.stat(fullname)
+ except os.error, msg:
+ # Try looking through the module search path
+ basename = os.path.split(filename)[1]
+ for dirname in sys.path:
+ fullname = os.path.join(dirname, basename)
+ try:
+ stat = os.stat(fullname)
+ break
+ except os.error:
+ pass
+ else:
+ # No luck
+## print '*** Cannot stat', filename, ':', msg
+ return []
+ try:
+ fp = open(fullname, 'r')
+ lines = fp.readlines()
+ fp.close()
+ except IOError, msg:
+## print '*** Cannot open', fullname, ':', msg
+ return []
+ size, mtime = stat[ST_SIZE], stat[ST_MTIME]
+ cache[filename] = size, mtime, lines, fullname
+ return lines
-"""Mac specific module for conversion between pathnames and URLs.
-Do not import directly, use urllib instead."""
+"""Macintosh-specific module for conversion between pathnames and URLs.
+
+Do not import directly; use urllib instead."""
import string
import urllib
tp = urllib.splittype(pathname)[0]
if tp and tp <> 'file':
raise RuntimeError, 'Cannot convert non-local URL to pathname'
+ # Turn starting /// into /, an empty hostname means current host
+ if pathname[:3] == '///':
+ pathname = pathname[2:]
+ elif pathname[:2] == '//':
+ raise RuntimeError, 'Cannot convert non-local URL to pathname'
components = string.split(pathname, '/')
# Remove . and embedded ..
i = 0
--- /dev/null
+"""Generic MIME parser.
+
+Classes:
+
+ MimeParser - Generic MIME parser.
+
+Exceptions:
+
+ MimeError - Exception raised by MimeParser class.
+
+XXX To do:
+
+- Content-transfer-encoding issues
+- Use Content-length header in rawbody()?
+- Cache parts instead of reparsing each time
+- The message strings in exceptions could use some work
+
+"""
+
+from types import * # Python types, not MIME types :-)
+import string
+import regex
+import SubFile
+import mimetools
+
+
+MimeError = "MimeParser.MimeError" # Exception raised by this class
+
+
+class MimeParser:
+
+ """Generic MIME parser.
+
+ This requires a seekable file.
+
+ """
+
+ def __init__(self, fp):
+ """Constructor: store the file pointer and parse the headers."""
+ self._fp = fp
+ self._start = fp.tell()
+ self._headers = h = mimetools.Message(fp)
+ self._bodystart = fp.tell()
+ self._multipart = h.getmaintype() == 'multipart'
+
+ def multipart(self):
+ """Return whether this is a multipart message."""
+ return self._multipart
+
+ def headers(self):
+ """Return the headers of the MIME message, as a Message object."""
+ return self._headers
+
+ def rawbody(self):
+ """Return the raw body of the MIME message, as a file-like object.
+
+ This is a fairly low-level interface -- for a multipart
+ message, you'd have to parse the body yourself, and it doesn't
+ translate the Content-transfer-encoding.
+
+ """
+ # XXX Use Content-length to set end if it exists?
+ return SubFile.SubFile(self._fp, self._bodystart)
+
+ def body(self):
+ """Return the body of a 1-part MIME message, as a file-like object.
+
+ This should interpret the Content-transfer-encoding, if any
+ (XXX currently it doesn't).
+
+ """
+ if self._multipart:
+ raise MimeError, "body() only works for 1-part messages"
+ return self.rawbody()
+
+ _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
+ regex.casefold)
+
+ def rawparts(self):
+ """Return the raw body parts of a multipart MIME message.
+
+ This returns a list of SubFile() objects corresponding to the
+ parts. Note that the phantom part before the first separator
+ is returned too, as list item 0. If the final part is not
+ followed by a terminator, it is ignored, and this error is not
+ reported. (XXX: the error should be raised).
+
+ """
+ if not self._multipart:
+ raise MimeError, "[raw]parts() only works for multipart messages"
+ h = self._headers
+ separator = h.getparam('boundary')
+ if not separator:
+ raise MimeError, "multipart boundary not specified"
+ separator = "--" + separator
+ terminator = separator + "--"
+ ns = len(separator)
+ list = []
+ f = self._fp
+ start = f.tell()
+ clength = -1
+ bodystart = -1
+ inheaders = 0
+ while 1:
+ end = f.tell()
+ line = f.readline()
+ if not line:
+ break
+ if line[:2] != "--" or line[:ns] != separator:
+ if inheaders:
+ re = self._re_content_length
+ if re.match(line) > 0:
+ try:
+ clength = string.atoi(re.group(1))
+ except string.atoi_error:
+ pass
+ if not string.strip(line):
+ inheaders = 0
+ bodystart = f.tell()
+ if clength > 0:
+ # Skip binary data
+ f.read(clength)
+ continue
+ line = string.strip(line)
+ if line == terminator or line == separator:
+ if clength >= 0:
+ # The Content-length header determines the subfile size
+ end = bodystart + clength
+ else:
+ # The final newline is not part of the content
+ end = end-1
+ list.append(SubFile.SubFile(f, start, end))
+ start = f.tell()
+ clength = -1
+ inheaders = 1
+ if line == terminator:
+ break
+ return list
+
+ def parts(self):
+ """Return the parsed body parts of a multipart MIME message.
+
+ This returns a list of MimeParser() instances corresponding to
+ the parts. The phantom part before the first separator is not
+ included.
+
+ """
+ return map(MimeParser, self.rawparts()[1:])
+
+ def getsubpartbyposition(self, indices):
+ part = self
+ for i in indices:
+ part = part.parts()[i]
+ return part
+
+ def getsubpartbyid(self, id):
+ h = self._headers
+ cid = h.getheader('content-id')
+ if cid and cid == id:
+ return self
+ if self._multipart:
+ for part in self.parts():
+ parser = MimeParser(part)
+ hit = parser.getsubpartbyid(id)
+ if hit:
+ return hit
+ return None
+
+ def index(self):
+ """Return an index of the MIME file.
+
+ This parses the entire file and returns index information
+ about it, in the form of a tuple
+
+ (ctype, headers, body)
+
+ where 'ctype' is the content type string of the message
+ (e.g. `text/plain' or `multipart/mixed') and 'headers' is a
+ Message instance containing the message headers (which should
+ be treated as read-only).
+
+ The 'body' item depends on the content type:
+
+ - If it is an atomic message (anything except for content type
+ multipart/*), it is the file-like object returned by
+ self.body().
+
+ - For a content type of multipart/*, it is the list of
+ MimeParser() objects returned by self.parts().
+
+ """
+ if self._multipart:
+ body = self.parts()
+ else:
+ body = self.body()
+ return self._headers.gettype(), self._headers, body
+
+
+def _show(parser, level=0):
+ """Helper for _test()."""
+ ctype, headers, body = parser.index()
+ print ctype,
+ if type(body) == ListType:
+ nparts = len(body)
+ print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
+ n = 0
+ for part in body:
+ n = n+1
+ print "%*d." % (4*level+2, n),
+ _show(part, level+1)
+ else:
+ bodylines = body.readlines()
+ print "(%d header lines, %d body lines)" % (
+ len(headers.headers), len(bodylines))
+ for line in headers.headers + ['\n'] + bodylines:
+ if line[-1:] == '\n': line = line[:-1]
+ print " "*level + line
+
+def _test(args = None):
+ """Test program invoked when run as a script.
+
+ When a filename argument is specified, it reads from that file.
+ When no arguments are present, it defaults to 'testkp.txt' if it
+ exists, else it defaults to stdin.
+
+ """
+ if not args:
+ import sys
+ args = sys.argv[1:]
+ if args:
+ fn = args[0]
+ else:
+ import os
+ fn = 'testkp.txt'
+ if not os.path.exists(fn):
+ fn = '-'
+ if fn == '-':
+ fp = sys.stdin
+ else:
+ fp = open(fn)
+ mp = MimeParser(fp)
+ _show(mp)
+
+if __name__ == '__main__':
+ import sys
+ _test()
-# Various tools used by MIME-reading or MIME-writing programs.
+"""Various tools used by MIME-reading or MIME-writing programs."""
import os
import tempfile
-# A derived class of rfc822.Message that knows about MIME headers and
-# contains some hooks for decoding encoded and multipart messages.
-
class Message(rfc822.Message):
+ """A derived class of rfc822.Message that knows about MIME headers and
+ contains some hooks for decoding encoded and multipart messages."""
def __init__(self, fp, seekable = 1):
rfc822.Message.__init__(self, fp, seekable)
# -----------------
-# Return a random string usable as a multipart boundary.
-# The method used is so that it is *very* unlikely that the same
-# string of characters will every occur again in the Universe,
-# so the caller needn't check the data it is packing for the
-# occurrence of the boundary.
-#
-# The boundary contains dots so you have to quote it in the header.
-
_prefix = None
def choose_boundary():
+ """Return a random string usable as a multipart boundary.
+ The method used is so that it is *very* unlikely that the same
+ string of characters will every occur again in the Universe,
+ so the caller needn't check the data it is packing for the
+ occurrence of the boundary.
+
+ The boundary contains dots so you have to quote it in the header."""
+
global _prefix
import time
import random
# Subroutines for decoding some common content-transfer-types
def decode(input, output, encoding):
+ """Decode common content-transfer-encodings (base64, quopri, uuencode)."""
if encoding == 'base64':
import base64
return base64.decode(input, output)
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
import uu
return uu.decode(input, output)
+ if encoding in ('7bit', '8bit'):
+ output.write(input.read())
if decodetab.has_key(encoding):
pipethrough(input, decodetab[encoding], output)
else:
'unknown Content-Transfer-Encoding: %s' % encoding
def encode(input, output, encoding):
+ """Encode common content-transfer-encodings (base64, quopri, uuencode)."""
if encoding == 'base64':
import base64
return base64.encode(input, output)
if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'):
import uu
return uu.encode(input, output)
+ if encoding in ('7bit', '8bit'):
+ output.write(input.read())
if encodetab.has_key(encoding):
pipethrough(input, encodetab[encoding], output)
else:
knownfiles = [
"/usr/local/etc/httpd/conf/mime.types",
"/usr/local/lib/netscape/mime.types",
- "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
- "/usr/local/etc/mime.types", # Apache 1.3
+ "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
+ "/usr/local/etc/mime.types", # Apache 1.3
]
inited = 0
init()
scheme, url = urllib.splittype(url)
if scheme == 'data':
- # syntax of data URLs:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- # type/subtype defaults to "text/plain"
- comma = string.find(url, ',')
- if comma < 0:
- # bad data URL
- return None, None
- semi = string.find(url, ';', 0, comma)
- if semi >= 0:
- type = url[:semi]
- else:
- type = url[:comma]
- if '=' in type or '/' not in type:
- type = 'text/plain'
- return type, None # never compressed, so encoding is None
+ # syntax of data URLs:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ # type/subtype defaults to "text/plain"
+ comma = string.find(url, ',')
+ if comma < 0:
+ # bad data URL
+ return None, None
+ semi = string.find(url, ';', 0, comma)
+ if semi >= 0:
+ type = url[:semi]
+ else:
+ type = url[:comma]
+ if '=' in type or '/' not in type:
+ type = 'text/plain'
+ return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url)
while suffix_map.has_key(ext):
base, ext = posixpath.splitext(base + suffix_map[ext])
'.jpe': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.jpg': 'image/jpeg',
+ '.js': 'application/x-javascript',
'.latex': 'application/x-latex',
'.man': 'application/x-troff-man',
'.me': 'application/x-troff-me',
-# A class that makes each part of a multipart message "feel" like an
-# ordinary file, as long as you use fp.readline(). Allows recursive
-# use, for nested multipart messages. Probably best used together
-# with module mimetools.
-#
-# Suggested use:
-#
-# real_fp = open(...)
-# fp = MultiFile(real_fp)
-#
-# "read some lines from fp"
-# fp.push(separator)
-# while 1:
-# "read lines from fp until it returns an empty string" (A)
-# if not fp.next(): break
-# fp.pop()
-# "read remaining lines from fp until it returns an empty string"
-#
-# The latter sequence may be used recursively at (A).
-# It is also allowed to use multiple push()...pop() sequences.
-#
-# If seekable is given as 0, the class code will not do the bookeeping
-# it normally attempts in order to make seeks relative to the beginning of the
-# current file part. This may be useful when using MultiFile with a non-
-# seekable stream object.
+"""A readline()-style interface to the parts of a multipart message.
+
+The MultiFile class makes each part of a multipart message "feel" like
+an ordinary file, as long as you use fp.readline(). Allows recursive
+use, for nested multipart messages. Probably best used together
+with module mimetools.
+
+Suggested use:
+
+real_fp = open(...)
+fp = MultiFile(real_fp)
+
+"read some lines from fp"
+fp.push(separator)
+while 1:
+ "read lines from fp until it returns an empty string" (A)
+ if not fp.next(): break
+fp.pop()
+"read remaining lines from fp until it returns an empty string"
+
+The latter sequence may be used recursively at (A).
+It is also allowed to use multiple push()...pop() sequences.
+
+If seekable is given as 0, the class code will not do the bookeeping
+it normally attempts in order to make seeks relative to the beginning of the
+current file part. This may be useful when using MultiFile with a non-
+seekable stream object.
+"""
import sys
import string
Error = 'multifile.Error'
class MultiFile:
- #
+
seekable = 0
- #
+
def __init__(self, fp, seekable=1):
self.fp = fp
self.stack = [] # Grows down
self.seekable = 1
self.start = self.fp.tell()
self.posstack = [] # Grows down
- #
+
def tell(self):
if self.level > 0:
return self.lastpos
return self.fp.tell() - self.start
- #
+
def seek(self, pos, whence=0):
here = self.tell()
if whence:
self.fp.seek(pos + self.start)
self.level = 0
self.last = 0
- #
+
def readline(self):
if self.level > 0:
return ''
if self.level > 1:
raise Error,'Missing endmarker in MultiFile.readline()'
return ''
- #
+
def readlines(self):
list = []
while 1:
if not line: break
list.append(line)
return list
- #
+
def read(self): # Note: no size argument -- read until EOF only!
return string.joinfields(self.readlines(), '')
- #
+
def next(self):
while self.readline(): pass
if self.level > 1 or self.last:
if self.seekable:
self.start = self.fp.tell()
return 1
- #
+
def push(self, sep):
if self.level > 0:
raise Error, 'bad MultiFile.push() call'
if self.seekable:
self.posstack.insert(0, self.start)
self.start = self.fp.tell()
- #
+
def pop(self):
if self.stack == []:
raise Error, 'bad MultiFile.pop() call'
del self.posstack[0]
if self.level > 0:
self.lastpos = abslastpos - self.start
- #
+
def is_data(self, line):
return line[:2] <> '--'
- #
+
def section_divider(self, str):
return "--" + str
- #
+
def end_marker(self, str):
return "--" + str + "--"
-#
-# nturl2path convert a NT pathname to a file URL and
-# vice versa
+"""Convert a NT pathname to a file URL and vice versa."""
def url2pathname(url):
""" Convert a URL to a DOS path...
return path
def pathname2url(p):
-
""" Convert a DOS path name to a file url...
C:\foo\bar\spam.foo
-#
-# Start of posixfile.py
-#
-
-#
-# Extended file operations
-#
-# f = posixfile.open(filename, [mode, [bufsize]])
-# will create a new posixfile object
-#
-# f = posixfile.fileopen(fileobject)
-# will create a posixfile object from a builtin file object
-#
-# f.file()
-# will return the original builtin file object
-#
-# f.dup()
-# will return a new file object based on a new filedescriptor
-#
-# f.dup2(fd)
-# will return a new file object based on the given filedescriptor
-#
-# f.flags(mode)
-# will turn on the associated flag (merge)
-# mode can contain the following characters:
-#
-# (character representing a flag)
-# a append only flag
-# c close on exec flag
-# n no delay flag
-# s synchronization flag
-# (modifiers)
-# ! turn flags 'off' instead of default 'on'
-# = copy flags 'as is' instead of default 'merge'
-# ? return a string in which the characters represent the flags
-# that are set
-#
-# note: - the '!' and '=' modifiers are mutually exclusive.
-# - the '?' modifier will return the status of the flags after they
-# have been changed by other characters in the mode string
-#
-# f.lock(mode [, len [, start [, whence]]])
-# will (un)lock a region
-# mode can contain the following characters:
-#
-# (character representing type of lock)
-# u unlock
-# r read lock
-# w write lock
-# (modifiers)
-# | wait until the lock can be granted
-# ? return the first lock conflicting with the requested lock
-# or 'None' if there is no conflict. The lock returned is in the
-# format (mode, len, start, whence, pid) where mode is a
-# character representing the type of lock ('r' or 'w')
-#
-# note: - the '?' modifier prevents a region from being locked; it is
-# query only
-#
+"""Extended file operations available in POSIX.
+
+f = posixfile.open(filename, [mode, [bufsize]])
+ will create a new posixfile object
+
+f = posixfile.fileopen(fileobject)
+ will create a posixfile object from a builtin file object
+
+f.file()
+ will return the original builtin file object
+
+f.dup()
+ will return a new file object based on a new filedescriptor
+
+f.dup2(fd)
+ will return a new file object based on the given filedescriptor
+
+f.flags(mode)
+ will turn on the associated flag (merge)
+ mode can contain the following characters:
+
+ (character representing a flag)
+ a append only flag
+ c close on exec flag
+ n no delay flag
+ s synchronization flag
+ (modifiers)
+ ! turn flags 'off' instead of default 'on'
+ = copy flags 'as is' instead of default 'merge'
+ ? return a string in which the characters represent the flags
+ that are set
+
+ note: - the '!' and '=' modifiers are mutually exclusive.
+ - the '?' modifier will return the status of the flags after they
+ have been changed by other characters in the mode string
+
+f.lock(mode [, len [, start [, whence]]])
+ will (un)lock a region
+ mode can contain the following characters:
+
+ (character representing type of lock)
+ u unlock
+ r read lock
+ w write lock
+ (modifiers)
+ | wait until the lock can be granted
+ ? return the first lock conflicting with the requested lock
+ or 'None' if there is no conflict. The lock returned is in the
+ format (mode, len, start, whence, pid) where mode is a
+ character representing the type of lock ('r' or 'w')
+
+ note: - the '?' modifier prevents a region from being locked; it is
+ query only
+"""
class _posixfile_:
+ """File wrapper class that provides extra POSIX file routines."""
+
states = ['open', 'closed']
#
# additions for AIX by Vladimir.Marangozov@imag.fr
import sys, os
if sys.platform in ('netbsd1',
+ 'openbsd2',
'freebsd2', 'freebsd3',
'bsdos2', 'bsdos3', 'bsdos4'):
flock = struct.pack('lxxxxlxxxxlhh', \
if '?' in how:
if sys.platform in ('netbsd1',
+ 'openbsd2',
'freebsd2', 'freebsd3',
'bsdos2', 'bsdos3', 'bsdos4'):
l_start, l_len, l_pid, l_type, l_whence = \
else:
return 'w', l_len, l_start, l_whence, l_pid
-#
-# Public routine to obtain a posixfile object
-#
def open(name, mode='r', bufsize=-1):
+ """Public routine to open a file as a posixfile object."""
return _posixfile_().open(name, mode, bufsize)
def fileopen(file):
+ """Public routine to get a posixfile object from a Python file object."""
return _posixfile_().fileopen(file)
#
-# Module 'posixpath' -- common operations on Posix pathnames.
-# Some of this can actually be useful on non-Posix systems too, e.g.
-# for manipulation of the pathname component of URLs.
-# The "os.path" name is an alias for this module on Posix systems;
-# on other systems (e.g. Mac, Windows), os.path provides the same
-# operations in a manner specific to that platform, and is an alias
-# to another module (e.g. macpath, ntpath).
-"""Common pathname manipulations, Posix version.
-Instead of importing this module
-directly, import os and refer to this module as os.path.
+"""Common operations on Posix pathnames.
+
+Instead of importing this module directly, import os and refer to
+this module as os.path. The "os.path" name is an alias for this
+module on Posix systems; on other systems (e.g. Mac, Windows),
+os.path provides the same operations in a manner specific to that
+platform, and is an alias to another module (e.g. macpath, ntpath).
+
+Some of this can actually be useful on non-Posix systems too, e.g.
+for manipulation of the pathname component of URLs.
"""
import os
def getatime(filename):
"""Return the last access time of a file, reported by os.stat()."""
st = os.stat(filename)
- return st[stat.ST_MTIME]
+ return st[stat.ST_ATIME]
# Is a path a symbolic link?
# or to impose a different order of visiting.
def walk(top, func, arg):
- """walk(top,func,args) calls func(arg, d, files) for each directory "d"
+ """walk(top,func,arg) calls func(arg, d, files) for each directory "d"
in the tree rooted at "top" (including "top" itself). "files" is a list
of all the files and subdirs in directory "d".
"""
except os.error:
return
func(arg, top, names)
- exceptions = ('.', '..')
for name in names:
- if name not in exceptions:
name = join(top, name)
- if isdir(name) and not islink(name):
+ st = os.lstat(name)
+ if stat.S_ISDIR(st[stat.ST_MODE]):
walk(name, func, arg)
return slashes + string.joinfields(comps, '/')
-# Return an absolute path.
def abspath(path):
+ """Return an absolute path."""
if not isabs(path):
path = join(os.getcwd(), path)
return normpath(path)
MAGIC = imp.get_magic()
def wr_long(f, x):
- "Internal; write a 32-bit int to a file in little-endian order."
+ """Internal; write a 32-bit int to a file in little-endian order."""
f.write(chr( x & 0xff))
f.write(chr((x >> 8) & 0xff))
f.write(chr((x >> 16) & 0xff))
-# A multi-producer, multi-consumer queue.
+"""A multi-producer, multi-consumer queue."""
# define this exception to be compatible with Python 1.5's class
# exceptions, but also when -X option is used.
Full = 'Queue.Full'
class Queue:
- def __init__(self, maxsize):
+ def __init__(self, maxsize=0):
"""Initialize a queue object with a given maximum size.
If maxsize is <= 0, the queue size is infinite.
-# These bits are passed to regex.set_syntax() to choose among
-# alternative regexp syntaxes.
+"""Constants for selecting regexp syntaxes for the obsolete regex module.
+
+This module is only for backward compatibility. "regex" has now
+been replaced by the new regular expression module, "re".
+
+These bits are passed to regex.set_syntax() to choose among
+alternative regexp syntaxes.
+"""
# 1 means plain parentheses serve as grouping, and backslash
# parentheses are needed for literal searching.
--- /dev/null
+"""RFC-822 message manipulation class.
+
+XXX This is only a very rough sketch of a full RFC-822 parser;
+in particular the tokenizing of addresses does not adhere to all the
+quoting rules.
+
+Directions for use:
+
+To create a Message object: first open a file, e.g.:
+ fp = open(file, 'r')
+You can use any other legal way of getting an open file object, e.g. use
+sys.stdin or call os.popen().
+Then pass the open file object to the Message() constructor:
+ m = Message(fp)
+
+This class can work with any input object that supports a readline
+method. If the input object has seek and tell capability, the
+rewindbody method will work; also illegal lines will be pushed back
+onto the input stream. If the input object lacks seek but has an
+`unread' method that can push back a line of input, Message will use
+that to push back illegal lines. Thus this class can be used to parse
+messages coming from a buffered stream.
+
+The optional `seekable' argument is provided as a workaround for
+certain stdio libraries in which tell() discards buffered data before
+discovering that the lseek() system call doesn't work. For maximum
+portability, you should set the seekable argument to zero to prevent
+that initial \code{tell} when passing in an unseekable object such as
+a a file object created from a socket object. If it is 1 on entry --
+which it is by default -- the tell() method of the open file object is
+called once; if this raises an exception, seekable is reset to 0. For
+other nonzero values of seekable, this test is not made.
+
+To get the text of a particular header there are several methods:
+ str = m.getheader(name)
+ str = m.getrawheader(name)
+where name is the name of the header, e.g. 'Subject'.
+The difference is that getheader() strips the leading and trailing
+whitespace, while getrawheader() doesn't. Both functions retain
+embedded whitespace (including newlines) exactly as they are
+specified in the header, and leave the case of the text unchanged.
+
+For addresses and address lists there are functions
+ realname, mailaddress = m.getaddr(name) and
+ list = m.getaddrlist(name)
+where the latter returns a list of (realname, mailaddr) tuples.
+
+There is also a method
+ time = m.getdate(name)
+which parses a Date-like field and returns a time-compatible tuple,
+i.e. a tuple such as returned by time.localtime() or accepted by
+time.mktime().
+
+See the class definition for lower level access methods.
+
+There are also some utility functions here.
+"""
+# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
+
+import string
+import time
+
+
+_blanklines = ('\r\n', '\n') # Optimization for islast()
+
+
+class Message:
+ """Represents a single RFC-822-compliant message."""
+
+ def __init__(self, fp, seekable = 1):
+ """Initialize the class instance and read the headers."""
+ if seekable == 1:
+ # Exercise tell() to make sure it works
+ # (and then assume seek() works, too)
+ try:
+ fp.tell()
+ except:
+ seekable = 0
+ else:
+ seekable = 1
+ self.fp = fp
+ self.seekable = seekable
+ self.startofheaders = None
+ self.startofbody = None
+ #
+ if self.seekable:
+ try:
+ self.startofheaders = self.fp.tell()
+ except IOError:
+ self.seekable = 0
+ #
+ self.readheaders()
+ #
+ if self.seekable:
+ try:
+ self.startofbody = self.fp.tell()
+ except IOError:
+ self.seekable = 0
+
+ def rewindbody(self):
+ """Rewind the file to the start of the body (if seekable)."""
+ if not self.seekable:
+ raise IOError, "unseekable file"
+ self.fp.seek(self.startofbody)
+
+ def readheaders(self):
+ """Read header lines.
+
+ Read header lines up to the entirely blank line that
+ terminates them. The (normally blank) line that ends the
+ headers is skipped, but not included in the returned list.
+ If a non-header line ends the headers, (which is an error),
+ an attempt is made to backspace over it; it is never
+ included in the returned list.
+
+ The variable self.status is set to the empty string if all
+ went well, otherwise it is an error message.
+ The variable self.headers is a completely uninterpreted list
+ of lines contained in the header (so printing them will
+ reproduce the header exactly as it appears in the file).
+ """
+ self.dict = {}
+ self.__gamh_cache = {}
+ self.__gh_cache = {}
+ self.unixfrom = ''
+ self.headers = list = []
+ self.status = ''
+ headerseen = ""
+ firstline = 1
+ startofline = unread = tell = None
+ if hasattr(self.fp, 'unread'):
+ unread = self.fp.unread
+ elif self.seekable:
+ tell = self.fp.tell
+ while 1:
+ if tell:
+ startofline = tell()
+ line = self.fp.readline()
+ if not line:
+ self.status = 'EOF in headers'
+ break
+ # Skip unix From name time lines
+ if firstline and line[:5] == 'From ':
+ self.unixfrom = self.unixfrom + line
+ continue
+ firstline = 0
+ if headerseen and line[0] in ' \t':
+ # It's a continuation line.
+ list.append(line)
+ self.__gamh_cache[headerseen].append(line)
+ x = string.lstrip(
+ "%s\n %s" % (self.dict[headerseen], string.strip(line)))
+ self.dict[headerseen] = x
+ self.__gh_cache[headerseen][-1] = x
+ continue
+ elif self.iscomment(line):
+ # It's a comment. Ignore it.
+ continue
+ elif self.islast(line):
+ # Note! No pushback here! The delimiter line gets eaten.
+ break
+ headerseen = self.isheader(line)
+ if headerseen:
+ # It's a legal header line, save it.
+ list.append(line)
+ l = self.__gamh_cache.get(headerseen)
+ if not l:
+ self.__gamh_cache[headerseen] = l = []
+ l.append(line)
+ x = string.strip(line[len(headerseen)+1:])
+ self.dict[headerseen] = x
+ l = self.__gh_cache.get(headerseen)
+ if not l:
+ self.__gh_cache[headerseen] = l = []
+ l.append(x)
+ continue
+ else:
+ # It's not a header line; throw it back and stop here.
+ if not self.dict:
+ self.status = 'No headers'
+ else:
+ self.status = 'Non-header line where header expected'
+ # Try to undo the read.
+ if unread:
+ unread(line)
+ elif tell:
+ self.fp.seek(startofline)
+ else:
+ self.status = self.status + '; bad seek'
+ break
+
+ def isheader(self, line):
+ """Determine whether a given line is a legal header.
+
+ This method should return the header name, suitably canonicalized.
+ You may override this method in order to use Message parsing
+ on tagged data in RFC822-like formats with special header formats.
+ """
+ i = string.find(line, ':')
+ if i > 0:
+ return string.lower(line[:i])
+ else:
+ return None
+
+ def islast(self, line):
+ """Determine whether a line is a legal end of RFC-822 headers.
+
+ You may override this method if your application wants
+ to bend the rules, e.g. to strip trailing whitespace,
+ or to recognise MH template separators ('--------').
+ For convenience (e.g. for code reading from sockets) a
+ line consisting of \r\n also matches.
+ """
+ return line in _blanklines
+
+ def iscomment(self, line):
+ """Determine whether a line should be skipped entirely.
+
+ You may override this method in order to use Message parsing
+ on tagged data in RFC822-like formats that support embedded
+ comments or free-text data.
+ """
+ return None
+
+ def getallmatchingheaders(self, name,
+ # speed hack:
+ lower = string.lower):
+ """Find all header lines matching a given header name.
+
+ Look through the list of headers and find all lines
+ matching a given header name (and their continuation
+ lines). A list of the lines is returned, without
+ interpretation. If the header does not occur, an
+ empty list is returned. If the header occurs multiple
+ times, all occurrences are returned. Case is not
+ important in the header name.
+ """
+ r = self.__gamh_cache.get(lower(name))
+ if r:
+ return r[:]
+ return []
+
+ def getfirstmatchingheader(self, name,
+ # speed hack:
+ lower = string.lower):
+ """Get the first header line matching name.
+
+ This is similar to getallmatchingheaders, but it returns
+ only the first matching header (and its continuation
+ lines).
+ """
+ l = self.__gamh_cache.get(lower(name))
+ if not l:
+ return []
+ r = []
+ for item in l:
+ if r and item[0] not in " \t":
+ break
+ r.append(item)
+ return r
+
+ def getrawheader(self, name):
+ """A higher-level interface to getfirstmatchingheader().
+
+ Return a string containing the literal text of the
+ header but with the keyword stripped. All leading,
+ trailing and embedded whitespace is kept in the
+ string, however.
+ Return None if the header does not occur.
+ """
+
+ list = self.getfirstmatchingheader(name)
+ if not list:
+ return None
+ list[0] = list[0][len(name) + 1:]
+ return string.joinfields(list, '')
+
+ def getheader(self, name, default=None):
+ """Get the header value for a name.
+
+ This is the normal interface: it returns a stripped
+ version of the header value for a given header name,
+ or None if it doesn't exist. This uses the dictionary
+ version which finds the *last* such header.
+ """
+ try:
+ return self.dict[string.lower(name)]
+ except KeyError:
+ return default
+ get = getheader
+
+ def getheaders(self, name,
+ # speed hack:
+ lower = string.lower):
+ """Get all values for a header.
+
+ This returns a list of values for headers given more than once;
+ each value in the result list is stripped in the same way as the
+ result of getheader(). If the header is not given, return an
+ empty list.
+ """
+ r = self.__gh_cache.get(lower(name))
+ if r:
+ return r[:]
+ return []
+
+ def getaddr(self, name):
+ """Get a single address from a header, as a tuple.
+
+ An example return value:
+ ('Guido van Rossum', 'guido@cwi.nl')
+ """
+ # New, by Ben Escoto
+ alist = self.getaddrlist(name)
+ if alist:
+ return alist[0]
+ else:
+ return (None, None)
+
+ def getaddrlist(self, name):
+ """Get a list of addresses from a header.
+
+ Retrieves a list of addresses from a header, where each address is a
+ tuple as returned by getaddr(). Scans all named headers, so it works
+ properly with multiple To: or Cc: headers for example.
+
+ """
+ raw = []
+ for h in self.getallmatchingheaders(name):
+ if h[0] in ' \t':
+ raw.append(h)
+ else:
+ if raw:
+ raw.append(', ')
+ i = string.find(h, ':')
+ if i > 0:
+ addr = h[i+1:]
+ raw.append(addr)
+ alladdrs = string.join(raw, '')
+ a = AddrlistClass(alladdrs)
+ return a.getaddrlist()
+
+ def getdate(self, name):
+ """Retrieve a date field from a header.
+
+ Retrieves a date field from the named header, returning
+ a tuple compatible with time.mktime().
+ """
+ try:
+ data = self[name]
+ except KeyError:
+ return None
+ return parsedate(data)
+
+ def getdate_tz(self, name):
+ """Retrieve a date field from a header as a 10-tuple.
+
+ The first 9 elements make up a tuple compatible with
+ time.mktime(), and the 10th is the offset of the poster's
+ time zone from GMT/UTC.
+ """
+ try:
+ data = self[name]
+ except KeyError:
+ return None
+ return parsedate_tz(data)
+
+
+ # Access as a dictionary (only finds *last* header of each type):
+
+ def __len__(self):
+ """Get the number of headers in a message."""
+ return len(self.dict)
+
+ def __getitem__(self, name):
+ """Get a specific header, as from a dictionary."""
+ return self.dict[string.lower(name)]
+
+ def __setitem__(self, name, value):
+ """Set the value of a header.
+
+ Note: This is not a perfect inversion of __getitem__, because
+ any changed headers get stuck at the end of the raw-headers list
+ rather than where the altered header was.
+ """
+ del self[name] # Won't fail if it doesn't exist
+ self.dict[string.lower(name)] = value
+ text = name + ": " + value
+ lines = string.split(text, "\n")
+ for line in lines:
+ self.headers.append(line + "\n")
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a specific header, if it is present."""
+ name = string.lower(name)
+ if not self.dict.has_key(name):
+ return
+ del self.dict[name]
+ name = name + ':'
+ n = len(name)
+ list = []
+ hit = 0
+ for i in range(len(self.headers)):
+ line = self.headers[i]
+ if string.lower(line[:n]) == name:
+ hit = 1
+ elif line[:1] not in string.whitespace:
+ hit = 0
+ if hit:
+ list.append(i)
+ list.reverse()
+ for i in list:
+ del self.headers[i]
+
+ def has_key(self, name):
+ """Determine whether a message contains the named header."""
+ return self.dict.has_key(string.lower(name))
+
+ def keys(self):
+ """Get all of a message's header field names."""
+ return self.dict.keys()
+
+ def values(self):
+ """Get all of a message's header field values."""
+ return self.dict.values()
+
+ def items(self):
+ """Get all of a message's headers.
+
+ Returns a list of name, value tuples.
+ """
+ return self.dict.items()
+
+ def __str__(self):
+ str = ''
+ for hdr in self.headers:
+ str = str + hdr
+ return str
+
+
+# Utility functions
+# -----------------
+
+# XXX Should fix unquote() and quote() to be really conformant.
+# XXX The inverses of the parse functions may also be useful.
+
+
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str[0] == '"' and str[-1:] == '"':
+ return str[1:-1]
+ if str[0] == '<' and str[-1:] == '>':
+ return str[1:-1]
+ return str
+
+
+def quote(str):
+ """Add quotes around a string."""
+ return '"%s"' % string.join(
+ string.split(
+ string.join(
+ string.split(str, '\\'),
+ '\\\\'),
+ '"'),
+ '\\"')
+
+
+def parseaddr(address):
+ """Parse an address into a (realname, mailaddr) tuple."""
+ a = AddrlistClass(address)
+ list = a.getaddrlist()
+ if not list:
+ return (None, None)
+ else:
+ return list[0]
+
+
+class AddrlistClass:
+ """Address parser class by Ben Escoto.
+
+ To understand what this class does, it helps to have a copy of
+ RFC-822 in front of you.
+
+ Note: this class interface is deprecated and may be removed in the future.
+ Use rfc822.AddressList instead.
+ """
+
+ def __init__(self, field):
+ """Initialize a new instance.
+
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.atomends = self.specials + self.LWS + self.CR
+ self.field = field
+ self.commentlist = []
+
+ def gotonext(self):
+ """Parse up to the start of the next address."""
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ self.pos = self.pos + 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else: break
+
+ def getaddrlist(self):
+ """Parse all addresses.
+
+ Returns a list containing all of the addresses.
+ """
+ ad = self.getaddress()
+ if ad:
+ return ad + self.getaddrlist()
+ else: return []
+
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+
+ self.gotonext()
+ returnlist = []
+
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(string.join(self.commentlist), plist[0])]
+
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(string.join(self.commentlist), addrspec)]
+
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+
+ fieldlen = len(self.field)
+ self.pos = self.pos + 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos = self.pos + 1
+ break
+ returnlist = returnlist + self.getaddress()
+
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+
+ if self.commentlist:
+ returnlist = [(string.join(plist) + ' (' + \
+ string.join(self.commentlist) + ')', routeaddr)]
+ else: returnlist = [(string.join(plist), routeaddr)]
+
+ else:
+ if plist:
+ returnlist = [(string.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos = self.pos + 1
+
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos = self.pos + 1
+ return returnlist
+
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+
+ expectroute = 0
+ self.pos = self.pos + 1
+ self.gotonext()
+ adlist = None
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = 0
+ elif self.field[self.pos] == '>':
+ self.pos = self.pos + 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos = self.pos + 1
+ expectroute = 1
+ elif self.field[self.pos] == ':':
+ self.pos = self.pos + 1
+ expectaddrspec = 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos = self.pos + 1
+ break
+ self.gotonext()
+
+ return adlist
+
+ def getaddrspec(self):
+ """Parse an RFC-822 addr-spec."""
+ aslist = []
+
+ self.gotonext()
+ while self.pos < len(self.field):
+ if self.field[self.pos] == '.':
+ aslist.append('.')
+ self.pos = self.pos + 1
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % self.getquote())
+ elif self.field[self.pos] in self.atomends:
+ break
+ else: aslist.append(self.getatom())
+ self.gotonext()
+
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return string.join(aslist, '')
+
+ aslist.append('@')
+ self.pos = self.pos + 1
+ self.gotonext()
+ return string.join(aslist, '') + self.getdomain()
+
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos = self.pos + 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos = self.pos + 1
+ sdlist.append('.')
+ elif self.field[self.pos] in self.atomends:
+ break
+ else: sdlist.append(self.getatom())
+ return string.join(sdlist, '')
+
+ def getdelimited(self, beginchar, endchars, allowcomments = 1):
+ """Parse a header fragment delimited by special characters.
+
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+
+ If `allowcomments' is non-zero, embedded RFC-822 comments
+ are allowed within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+
+ slist = ['']
+ quote = 0
+ self.pos = self.pos + 1
+ while self.pos < len(self.field):
+ if quote == 1:
+ slist.append(self.field[self.pos])
+ quote = 0
+ elif self.field[self.pos] in endchars:
+ self.pos = self.pos + 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ elif self.field[self.pos] == '\\':
+ quote = 1
+ else:
+ slist.append(self.field[self.pos])
+ self.pos = self.pos + 1
+
+ return string.join(slist, '')
+
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', 0)
+
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', 1)
+
+ def getdomainliteral(self):
+ """Parse an RFC-822 domain-literal."""
+ return self.getdelimited('[', ']\r', 0)
+
+ def getatom(self):
+ """Parse an RFC-822 atom."""
+ atomlist = ['']
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.atomends:
+ break
+ else: atomlist.append(self.field[self.pos])
+ self.pos = self.pos + 1
+
+ return string.join(atomlist, '')
+
+ def getphraselist(self):
+ """Parse a sequence of RFC-822 phrases.
+
+ A phrase is a sequence of words, which are in turn either
+ RFC-822 atoms or quoted-strings. Phrases are canonicalized
+ by squeezing all runs of continuous whitespace into one space.
+ """
+ plist = []
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos = self.pos + 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.atomends:
+ break
+ else: plist.append(self.getatom())
+
+ return plist
+
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+
+ def __len__(self):
+ return len(self.addresslist)
+
+ def __str__(self):
+ return string.joinfields(map(dump_address_pair, self.addresslist),", ")
+
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
+
+def dump_address_pair(pair):
+ """Dump a (name, address) pair in a canonicalized form."""
+ if pair[0]:
+ return '"' + pair[0] + '" <' + pair[1] + '>'
+ else:
+ return pair[1]
+
+# Parse a date field
+
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+
+
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+
+ Accounts for military timezones.
+ """
+ data = string.split(data)
+ if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = string.split(data[0], '-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = string.find(s, '+')
+ if i > 0:
+ data[3:] = [s[:i], s[i+1:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = string.lower(mm)
+ if not mm in _monthnames:
+ dd, mm = mm, string.lower(dd)
+ if not mm in _monthnames:
+ return None
+ mm = _monthnames.index(mm)+1
+ if mm > 12: mm = mm - 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = string.find(yy, ':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if yy[0] not in string.digits:
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = string.splitfields(tm, ':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = string.atoi(yy)
+ dd = string.atoi(dd)
+ thh = string.atoi(thh)
+ tmm = string.atoi(tmm)
+ tss = string.atoi(tss)
+ except string.atoi_error:
+ return None
+ tzoffset=None
+ tz=string.upper(tz)
+ if _timezones.has_key(tz):
+ tzoffset=_timezones[tz]
+ else:
+ try:
+ tzoffset=string.atoi(tz)
+ except string.atoi_error:
+ pass
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
+ tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
+ return tuple
+
+
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t=parsedate_tz(data)
+ if type(t)==type( () ):
+ return t[:9]
+ else: return t
+
+
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = time.mktime(data[:8] + (0,))
+ return t - data[9] - time.timezone
+
+def formatdate(timeval=None):
+ """Returns time format preferred for Internet standards.
+
+ Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
+ """
+ if timeval is None:
+ timeval = time.time()
+ return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
+ time.gmtime(timeval))
+
+
+# When used as script, run a small test program.
+# The first command line argument must be a filename containing one
+# message in RFC-822 format.
+
+if __name__ == '__main__':
+ import sys, os
+ file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
+ if sys.argv[1:]: file = sys.argv[1]
+ f = open(file, 'r')
+ m = Message(f)
+ print 'From:', m.getaddr('from')
+ print 'To:', m.getaddrlist('to')
+ print 'Subject:', m.getheader('subject')
+ print 'Date:', m.getheader('date')
+ date = m.getdate_tz('date')
+ if date:
+ print 'ParsedDate:', time.asctime(date[:-1]),
+ hhmmss = date[-1]
+ hhmm, ss = divmod(hhmmss, 60)
+ hh, mm = divmod(hhmm, 60)
+ print "%+03d%02d" % (hh, mm),
+ if ss: print ".%02d" % ss,
+ print
+ else:
+ print 'ParsedDate:', None
+ m.rewindbody()
+ n = 0
+ while f.readline():
+ n = n + 1
+ print 'Lines:', n
+ print '-'*70
+ print 'len =', len(m)
+ if m.has_key('Date'): print 'Date =', m['Date']
+ if m.has_key('X-Nonsense'): pass
+ print 'keys =', m.keys()
+ print 'values =', m.values()
+ print 'items =', m.items()
"""Word completion for GNU readline 2.0.
This requires the latest extension to the readline module (the
-set_completer() function). When completing a simple identifier, it
completes keywords, built-ins and globals in __main__; when completing
NAME.NAME..., it evaluates (!) the expression up to the last dot and
completes its attributes.
Assuming the text is of the form NAME.NAME....[NAME], and is
evaluabable in the globals of __main__, it will be evaluated
and its attributes (as revealed by dir()) are used as possible
- completions.
+ completions. (For class instances, class members are are also
+ considered.)
WARNING: this can still invoke arbitrary C code, if an object
with a __getattr__ hook is evaluated.
if not m:
return
expr, attr = m.group(1, 3)
- words = dir(eval(expr, __main__.__dict__))
+ object = eval(expr, __main__.__dict__)
+ words = dir(object)
+ if hasattr(object,'__class__'):
+ words.append('__class__')
+ words = words + get_class_members(object.__class__)
matches = []
n = len(attr)
for word in words:
matches.append("%s.%s" % (expr, word))
return matches
+def get_class_members(klass):
+ ret = dir(klass)
+ if hasattr(klass,'__bases__'):
+ for base in klass.__bases__:
+ ret = ret + get_class_members(base)
+ return ret
+
readline.set_completer(Completer().complete)
--- /dev/null
+"""
+
+Robots.txt file parser class. Accepts a list of lines or robots.txt URL as
+input, builds a set of rules from that list, then answers questions about
+fetchability of other URLs.
+
+"""
+
+class RobotFileParser:
+
+ def __init__(self):
+ self.rules = {}
+ self.debug = 0
+ self.url = ''
+ self.last_checked = 0
+
+ def mtime(self):
+ return self.last_checked
+
+ def modified(self):
+ import time
+ self.last_checked = time.time()
+
+ def set_url(self, url):
+ self.url = url
+
+ def read(self):
+ import urllib
+ self.parse(urllib.urlopen(self.url).readlines())
+
+ def parse(self, lines):
+ """parse the input lines from a robot.txt file"""
+ import string, re
+ active = []
+ for line in lines:
+ if self.debug: print '>', line,
+ # blank line terminates current record
+ if not line[:-1]:
+ active = []
+ continue
+ # remove optional comment and strip line
+ line = string.strip(line[:string.find(line, '#')])
+ if not line:
+ continue
+ line = re.split(' *: *', line)
+ if len(line) == 2:
+ line[0] = string.lower(line[0])
+ if line[0] == 'user-agent':
+ # this record applies to this user agent
+ if self.debug: print '>> user-agent:', line[1]
+ active.append(line[1])
+ if not self.rules.has_key(line[1]):
+ self.rules[line[1]] = []
+ elif line[0] == 'disallow':
+ if line[1]:
+ if self.debug: print '>> disallow:', line[1]
+ for agent in active:
+ self.rules[agent].append(re.compile(line[1]))
+ else:
+ pass
+ for agent in active:
+ if self.debug: print '>> allow', agent
+ self.rules[agent] = []
+ else:
+ if self.debug: print '>> unknown:', line
+
+ self.modified()
+
+ # returns true if agent is allowed to fetch url
+ def can_fetch(self, useragent, url):
+ """using the parsed robots.txt decide if useragent can fetch url"""
+ import urlparse
+ ag = useragent
+ if not self.rules.has_key(ag): ag = '*'
+ if not self.rules.has_key(ag):
+ if self.debug: print '>> allowing', url, 'fetch by', useragent
+ return 1
+ path = urlparse.urlparse(url)[2]
+ for rule in self.rules[ag]:
+ if rule.match(path) is not None:
+ if self.debug: print '>> disallowing', url, 'fetch by', useragent
+ return 0
+ if self.debug: print '>> allowing', url, 'fetch by', useragent
+ return 1
+
+def _test():
+ rp = RobotFileParser()
+ rp.debug = 1
+ rp.set_url('http://www.musi-cal.com/robots.txt')
+ rp.read()
+ print rp.rules
+ print rp.can_fetch('*', 'http://www.musi-cal.com.com/')
+ print rp.can_fetch('Musi-Cal-Robot',
+ 'http://www.musi-cal.com/cgi-bin/event-search?city=San+Francisco')
+
+if __name__ == "__main__":
+ _test()
import os
-import sys
-import time
-import socket
import string
import posixpath
-import SocketServer
import BaseHTTPServer
+import urllib
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
probably be diagnosed.)
"""
- path = posixpath.normpath(path)
+ path = posixpath.normpath(urllib.unquote(path))
words = string.splitfields(path, '/')
words = filter(None, words)
path = os.getcwd()
def handle_request(self):
"""Handle one request, possibly blocking."""
- request, client_address = self.get_request()
+ try:
+ request, client_address = self.get_request()
+ except socket.error:
+ return
if self.verify_request(request, client_address):
try:
self.process_request(request, client_address)
"""Mix-in class to handle each request in a new process."""
active_children = None
+ max_children = 40
def collect_children(self):
"""Internal routine to wait for died children."""
while self.active_children:
- pid, status = os.waitpid(0, os.WNOHANG)
+ if len(self.active_children) < self.max_children:
+ options = os.WNOHANG
+ else:
+ # If the maximum number of children are already
+ # running, block while waiting for a child to exit
+ options = 0
+ try:
+ pid, status = os.waitpid(0, options)
+ except os.error:
+ pid = None
if not pid: break
self.active_children.remove(pid)
# Child process.
# This must never return, hence os._exit()!
try:
+ self.socket.close()
self.finish_request(request, client_address)
os._exit(0)
except:
class ThreadingMixIn:
-
"""Mix-in class to handle each request in a new thread."""
def process_request(self, request, client_address):
"""Start a new thread to process the request."""
- import thread
- thread.start_new_thread(self.finish_request,
- (request, client_address))
+ import threading
+ t = threading.Thread(target = self.finish_request,
+ args = (request, client_address))
+ t.start()
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
--- /dev/null
+#
+# Secret Labs' Regular Expression Engine
+# $Id$
+#
+# convert template to internal format
+#
+# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+#
+# This code can only be used for 1.6 alpha testing. All other use
+# require explicit permission from Secret Labs AB.
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+
+# FIXME: <fl> formalize (objectify?) and document the compiler code
+# format, so that other frontends can use this compiler
+
+import array, string, sys
+
+import _sre
+
+from sre_constants import *
+
+# find an array type code that matches the engine's code size
+for WORDSIZE in "BHil":
+ if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
+ break
+else:
+ raise RuntimeError, "cannot find a useable array type"
+
+# FIXME: <fl> should move some optimizations from the parser to here!
+
+class Code:
+ def __init__(self):
+ self.data = []
+ def __len__(self):
+ return len(self.data)
+ def __getitem__(self, index):
+ return self.data[index]
+ def __setitem__(self, index, code):
+ self.data[index] = code
+ def append(self, code):
+ self.data.append(code)
+ def todata(self):
+ # print self.data
+ return array.array(WORDSIZE, self.data).tostring()
+
+def _lower(literal):
+ # return _sre._lower(literal) # FIXME
+ return string.lower(literal)
+
+def _compile(code, pattern, flags):
+ append = code.append
+ for op, av in pattern:
+ if op is ANY:
+ if "s" in flags:
+ append(CODES[op]) # any character at all!
+ else:
+ append(CODES[NOT_LITERAL])
+ append(10)
+ elif op in (SUCCESS, FAILURE):
+ append(CODES[op])
+ elif op is AT:
+ append(CODES[op])
+ append(POSITIONS[av])
+ elif op is BRANCH:
+ append(CODES[op])
+ tail = []
+ for av in av[1]:
+ skip = len(code); append(0)
+ _compile(code, av, flags)
+ append(CODES[JUMP])
+ tail.append(len(code)); append(0)
+ code[skip] = len(code) - skip
+ append(0) # end of branch
+ for tail in tail:
+ code[tail] = len(code) - tail
+ elif op is CALL:
+ append(CODES[op])
+ skip = len(code); append(0)
+ _compile(code, av, flags)
+ append(CODES[SUCCESS])
+ code[skip] = len(code) - skip
+ elif op is CATEGORY: # not used by current parser
+ append(CODES[op])
+ append(CATEGORIES[av])
+ elif op is GROUP:
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ else:
+ append(CODES[op])
+ append(av)
+ elif op is IN:
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ def fixup(literal):
+ return ord(_lower(literal))
+ else:
+ append(CODES[op])
+ fixup = ord
+ skip = len(code); append(0)
+ for op, av in av:
+ append(CODES[op])
+ if op is NEGATE:
+ pass
+ elif op is LITERAL:
+ append(fixup(av))
+ elif op is RANGE:
+ append(fixup(av[0]))
+ append(fixup(av[1]))
+ elif op is CATEGORY:
+ append(CATEGORIES[av])
+ else:
+ raise ValueError, "unsupported set operator"
+ append(CODES[FAILURE])
+ code[skip] = len(code) - skip
+ elif op in (LITERAL, NOT_LITERAL):
+ if "i" in flags:
+ append(CODES[MAP_IGNORE[op]])
+ append(ord(_lower(av)))
+ else:
+ append(CODES[op])
+ append(ord(av))
+ elif op is MARK:
+ append(CODES[op])
+ append(av)
+ elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
+ lo, hi = av[2].getwidth()
+ if lo == 0:
+ raise SyntaxError, "cannot repeat zero-width items"
+ if lo == hi == 1 and op is MAX_REPEAT:
+ append(CODES[MAX_REPEAT_ONE])
+ skip = len(code); append(0)
+ append(av[0])
+ append(av[1])
+ _compile(code, av[2], flags)
+ append(CODES[SUCCESS])
+ code[skip] = len(code) - skip
+ else:
+ append(CODES[op])
+ skip = len(code); append(0)
+ append(av[0])
+ append(av[1])
+ _compile(code, av[2], flags)
+ if op is MIN_REPEAT:
+ append(CODES[MIN_UNTIL])
+ else:
+ # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
+ append(CODES[MAX_UNTIL])
+ code[skip] = len(code) - skip
+ elif op is SUBPATTERN:
+## group = av[0]
+## if group:
+## append(CODES[MARK])
+## append((group-1)*2)
+ _compile(code, av[1], flags)
+## if group:
+## append(CODES[MARK])
+## append((group-1)*2+1)
+ else:
+ raise ValueError, ("unsupported operand type", op)
+
+def compile(p, flags=()):
+ # convert pattern list to internal format
+ if type(p) in (type(""), type(u"")):
+ import sre_parse
+ pattern = p
+ p = sre_parse.parse(p)
+ else:
+ pattern = None
+ # print p.getwidth()
+ # print p
+ code = Code()
+ _compile(code, p.data, p.pattern.flags)
+ code.append(CODES[SUCCESS])
+ # print list(code.data)
+ data = code.todata()
+ if 0: # debugging
+ print
+ print "-" * 68
+ import sre_disasm
+ sre_disasm.disasm(data)
+ print "-" * 68
+ # print len(data), p.pattern.groups, len(p.pattern.groupdict)
+ return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
--- /dev/null
+#
+# Secret Labs' Regular Expression Engine
+# $Id$
+#
+# various symbols used by the regular expression engine.
+# run this script to update the _sre include files!
+#
+# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
+#
+# This code can only be used for 1.6 alpha testing. All other use
+# require explicit permission from Secret Labs AB.
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+
+# operators
+
+FAILURE = "failure"
+SUCCESS = "success"
+
+ANY = "any"
+ASSERT = "assert"
+AT = "at"
+BRANCH = "branch"
+CALL = "call"
+CATEGORY = "category"
+GROUP = "group"
+GROUP_IGNORE = "group_ignore"
+IN = "in"
+IN_IGNORE = "in_ignore"
+JUMP = "jump"
+LITERAL = "literal"
+LITERAL_IGNORE = "literal_ignore"
+MARK = "mark"
+MAX_REPEAT = "max_repeat"
+MAX_REPEAT_ONE = "max_repeat_one"
+MAX_UNTIL = "max_until"
+MIN_REPEAT = "min_repeat"
+MIN_UNTIL = "min_until"
+NEGATE = "negate"
+NOT_LITERAL = "not_literal"
+NOT_LITERAL_IGNORE = "not_literal_ignore"
+RANGE = "range"
+REPEAT = "repeat"
+SUBPATTERN = "subpattern"
+
+# positions
+AT_BEGINNING = "at_beginning"
+AT_BOUNDARY = "at_boundary"
+AT_NON_BOUNDARY = "at_non_boundary"
+AT_END = "at_end"
+
+# categories
+
+CATEGORY_DIGIT = "category_digit"
+CATEGORY_NOT_DIGIT = "category_not_digit"
+CATEGORY_SPACE = "category_space"
+CATEGORY_NOT_SPACE = "category_not_space"
+CATEGORY_WORD = "category_word"
+CATEGORY_NOT_WORD = "category_not_word"
+
+CODES = [
+
+ # failure=0 success=1 (just because it looks better that way :-)
+ FAILURE, SUCCESS,
+
+ ANY,
+ ASSERT,
+ AT,
+ BRANCH,
+ CALL,
+ CATEGORY,
+ GROUP, GROUP_IGNORE,
+ IN, IN_IGNORE,
+ JUMP,
+ LITERAL, LITERAL_IGNORE,
+ MARK,
+ MAX_REPEAT, MAX_UNTIL,
+ MAX_REPEAT_ONE,
+ MIN_REPEAT, MIN_UNTIL,
+ NOT_LITERAL, NOT_LITERAL_IGNORE,
+ NEGATE,
+ RANGE,
+ REPEAT
+
+]
+
+# convert to dictionary
+c = {}
+i = 0
+for code in CODES:
+ c[code] = i
+ i = i + 1
+CODES = c
+
+# replacement operations for "ignore case" mode
+MAP_IGNORE = {
+ GROUP: GROUP_IGNORE,
+ IN: IN_IGNORE,
+ LITERAL: LITERAL_IGNORE,
+ NOT_LITERAL: NOT_LITERAL_IGNORE
+}
+
+POSITIONS = {
+ AT_BEGINNING: ord("a"),
+ AT_BOUNDARY: ord("b"),
+ AT_NON_BOUNDARY: ord("B"),
+ AT_END: ord("z"),
+}
+
+CATEGORIES = {
+ CATEGORY_DIGIT: ord("d"),
+ CATEGORY_NOT_DIGIT: ord("D"),
+ CATEGORY_SPACE: ord("s"),
+ CATEGORY_NOT_SPACE: ord("S"),
+ CATEGORY_WORD: ord("w"),
+ CATEGORY_NOT_WORD: ord("W"),
+}
+
+if __name__ == "__main__":
+ import string
+ items = CODES.items()
+ items.sort(lambda a, b: cmp(a[1], b[1]))
+ f = open("sre_constants.h", "w")
+ f.write("/* generated by sre_constants.py */\n")
+ for k, v in items:
+ f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
+ f.close()
+ print "done"
--- /dev/null
+#
+# Secret Labs' Regular Expression Engine
+# $Id$
+#
+# convert re-style regular expression to SRE template. the current
+# implementation is somewhat incomplete, and not very fast. should
+# definitely be rewritten before Python 1.6 goes beta.
+#
+# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
+#
+# This code can only be used for 1.6 alpha testing. All other use
+# require explicit permission from Secret Labs AB.
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+
+# FIXME: comments marked with the FIXME tag are open issues. all such
+# issues should be closed before the final beta.
+
+import string, sys
+
+from sre_constants import *
+
+SPECIAL_CHARS = ".\\[{()*+?^$|"
+REPEAT_CHARS = "*+?{"
+
+# FIXME: string in tuple tests may explode with if char is unicode :-(
+DIGITS = tuple(string.digits)
+
+OCTDIGITS = tuple("01234567")
+HEXDIGITS = tuple("0123456789abcdefABCDEF")
+
+ESCAPES = {
+ "\\a": (LITERAL, chr(7)),
+ "\\b": (LITERAL, chr(8)),
+ "\\f": (LITERAL, chr(12)),
+ "\\n": (LITERAL, chr(10)),
+ "\\r": (LITERAL, chr(13)),
+ "\\t": (LITERAL, chr(9)),
+ "\\v": (LITERAL, chr(11))
+}
+
+CATEGORIES = {
+ "\\A": (AT, AT_BEGINNING), # start of string
+ "\\b": (AT, AT_BOUNDARY),
+ "\\B": (AT, AT_NON_BOUNDARY),
+ "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
+ "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
+ "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
+ "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
+ "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
+ "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
+ "\\Z": (AT, AT_END), # end of string
+}
+
+class Pattern:
+ # FIXME: <fl> rename class, and store flags in here too!
+ def __init__(self):
+ self.flags = []
+ self.groups = 1
+ self.groupdict = {}
+ def getgroup(self, name=None):
+ gid = self.groups
+ self.groups = gid + 1
+ if name:
+ self.groupdict[name] = gid
+ return gid
+ def setflag(self, flag):
+ if flag in self.flags:
+ self.flags.append(flag)
+
+class SubPattern:
+ # a subpattern, in intermediate form
+ def __init__(self, pattern, data=None):
+ self.pattern = pattern
+ if not data:
+ data = []
+ self.data = data
+ self.flags = []
+ self.width = None
+ def __repr__(self):
+ return repr(self.data)
+ def __len__(self):
+ return len(self.data)
+ def __delitem__(self, index):
+ del self.data[index]
+ def __getitem__(self, index):
+ return self.data[index]
+ def __setitem__(self, index, code):
+ self.data[index] = code
+ def __getslice__(self, start, stop):
+ return SubPattern(self.pattern, self.data[start:stop])
+ def insert(self, index, code):
+ self.data.insert(index, code)
+ def append(self, code):
+ self.data.append(code)
+ def getwidth(self):
+ # determine the width (min, max) for this subpattern
+ if self.width:
+ return self.width
+ lo = hi = 0L
+ for op, av in self.data:
+ if op is BRANCH:
+ l = sys.maxint
+ h = 0
+ for av in av[1]:
+ i, j = av.getwidth()
+ l = min(l, i)
+ h = min(h, j)
+ lo = lo + i
+ hi = hi + j
+ elif op is CALL:
+ i, j = av.getwidth()
+ lo = lo + i
+ hi = hi + j
+ elif op is SUBPATTERN:
+ i, j = av[1].getwidth()
+ lo = lo + i
+ hi = hi + j
+ elif op in (MIN_REPEAT, MAX_REPEAT):
+ i, j = av[2].getwidth()
+ lo = lo + i * av[0]
+ hi = hi + j * av[1]
+ elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
+ lo = lo + 1
+ hi = hi + 1
+ elif op == SUCCESS:
+ break
+ self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
+ return self.width
+ def set(self, flag):
+ if not flag in self.flags:
+ self.flags.append(flag)
+ def reset(self, flag):
+ if flag in self.flags:
+ self.flags.remove(flag)
+
+class Tokenizer:
+ def __init__(self, string):
+ self.string = list(string)
+ self.next = self.__next()
+ def __next(self):
+ if not self.string:
+ return None
+ char = self.string[0]
+ if char[0] == "\\":
+ try:
+ c = self.string[1]
+ except IndexError:
+ raise SyntaxError, "bogus escape"
+ char = char + c
+ try:
+ if c == "x":
+ # hexadecimal constant
+ for i in xrange(2, sys.maxint):
+ c = self.string[i]
+ if str(c) not in HEXDIGITS:
+ break
+ char = char + c
+ elif str(c) in DIGITS:
+ # decimal (or octal) number
+ for i in xrange(2, sys.maxint):
+ c = self.string[i]
+ # FIXME: if larger than current number of
+ # groups, interpret as an octal number
+ if str(c) not in DIGITS:
+ break
+ char = char + c
+ except IndexError:
+ pass # use what we've got this far
+ del self.string[0:len(char)]
+ return char
+ def match(self, char):
+ if char == self.next:
+ self.next = self.__next()
+ return 1
+ return 0
+ def match_set(self, set):
+ if self.next and self.next in set:
+ self.next = self.__next()
+ return 1
+ return 0
+ def get(self):
+ this = self.next
+ self.next = self.__next()
+ return this
+
+def _fixescape(escape, character_class=0):
+ # convert escape to (type, value)
+ if character_class:
+ # inside a character class, we'll look in the character
+ # escapes dictionary first
+ code = ESCAPES.get(escape)
+ if code:
+ return code
+ code = CATEGORIES.get(escape)
+ else:
+ code = CATEGORIES.get(escape)
+ if code:
+ return code
+ code = ESCAPES.get(escape)
+ if code:
+ return code
+ if not character_class:
+ try:
+ group = int(escape[1:])
+ # FIXME: only valid if group <= current number of groups
+ return GROUP, group
+ except ValueError:
+ pass
+ try:
+ if escape[1:2] == "x":
+ escape = escape[2:]
+ return LITERAL, chr(int(escape[-2:], 16) & 0xff)
+ elif str(escape[1:2]) in DIGITS:
+ return LITERAL, chr(int(escape[1:], 8) & 0xff)
+ elif len(escape) == 2:
+ return LITERAL, escape[1]
+ except ValueError:
+ pass
+ raise SyntaxError, "bogus escape: %s" % repr(escape)
+
+def _branch(subpattern, items):
+
+ # form a branch operator from a set of items (FIXME: move this
+ # optimization to the compiler module!)
+
+ # check if all items share a common prefix
+ while 1:
+ prefix = None
+ for item in items:
+ if not item:
+ break
+ if prefix is None:
+ prefix = item[0]
+ elif item[0] != prefix:
+ break
+ else:
+ # all subitems start with a common "prefix".
+ # move it out of the branch
+ for item in items:
+ del item[0]
+ subpattern.append(prefix)
+ continue # check next one
+ break
+
+ # check if the branch can be replaced by a character set
+ for item in items:
+ if len(item) != 1 or item[0][0] != LITERAL:
+ break
+ else:
+ # we can store this as a character set instead of a
+ # branch (FIXME: use a range if possible)
+ set = []
+ for item in items:
+ set.append(item[0])
+ subpattern.append((IN, set))
+ return
+
+ subpattern.append((BRANCH, (None, items)))
+
+def _parse(source, pattern, flags=()):
+
+ # parse regular expression pattern into an operator list.
+
+ subpattern = SubPattern(pattern)
+
+ this = None
+
+ while 1:
+
+ if str(source.next) in ("|", ")"):
+ break # end of subpattern
+ this = source.get()
+ if this is None:
+ break # end of pattern
+
+ if this and this[0] not in SPECIAL_CHARS:
+ subpattern.append((LITERAL, this))
+
+ elif this == "[":
+ # character set
+ set = []
+## if source.match(":"):
+## pass # handle character classes
+ if source.match("^"):
+ set.append((NEGATE, None))
+ # check remaining characters
+ start = set[:]
+ while 1:
+ this = source.get()
+ if this == "]" and set != start:
+ break
+ elif this and this[0] == "\\":
+ code1 = _fixescape(this, 1)
+ elif this:
+ code1 = LITERAL, this
+ else:
+ raise SyntaxError, "unexpected end of regular expression"
+ if source.match("-"):
+ # potential range
+ this = source.get()
+ if this == "]":
+ set.append(code1)
+ set.append((LITERAL, "-"))
+ break
+ else:
+ if this[0] == "\\":
+ code2 = _fixescape(this, 1)
+ else:
+ code2 = LITERAL, this
+ if code1[0] != LITERAL or code2[0] != LITERAL:
+ raise SyntaxError, "illegal range"
+ if len(code1[1]) != 1 or len(code2[1]) != 1:
+ raise SyntaxError, "illegal range"
+ set.append((RANGE, (code1[1], code2[1])))
+ else:
+ if code1[0] is IN:
+ code1 = code1[1][0]
+ set.append(code1)
+
+ # FIXME: <fl> move set optimization to support function
+ if len(set)==1 and set[0][0] is LITERAL:
+ subpattern.append(set[0]) # optimization
+ elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
+ subpattern.append((NOT_LITERAL, set[1][1])) # optimization
+ else:
+ # FIXME: <fl> add charmap optimization
+ subpattern.append((IN, set))
+
+ elif this and this[0] in REPEAT_CHARS:
+ # repeat previous item
+ if this == "?":
+ min, max = 0, 1
+ elif this == "*":
+ min, max = 0, sys.maxint
+ elif this == "+":
+ min, max = 1, sys.maxint
+ elif this == "{":
+ min, max = 0, sys.maxint
+ lo = hi = ""
+ while str(source.next) in DIGITS:
+ lo = lo + source.get()
+ if source.match(","):
+ while str(source.next) in DIGITS:
+ hi = hi + source.get()
+ else:
+ hi = lo
+ if not source.match("}"):
+ raise SyntaxError, "bogus range"
+ if lo:
+ min = int(lo)
+ if hi:
+ max = int(hi)
+ # FIXME: <fl> check that hi >= lo!
+ else:
+ raise SyntaxError, "not supported"
+ # figure out which item to repeat
+ # FIXME: should back up to the right mark, right?
+ if subpattern:
+ index = len(subpattern)-1
+ while subpattern[index][0] is MARK:
+ index = index - 1
+ item = subpattern[index:index+1]
+ else:
+ raise SyntaxError, "nothing to repeat"
+ if source.match("?"):
+ subpattern[index] = (MIN_REPEAT, (min, max, item))
+ else:
+ subpattern[index] = (MAX_REPEAT, (min, max, item))
+ elif this == ".":
+ subpattern.append((ANY, None))
+ elif this == "(":
+ group = 1
+ name = None
+ if source.match("?"):
+ group = 0
+ # options
+ if source.match("P"):
+ # named group: skip forward to end of name
+ if source.match("<"):
+ name = ""
+ while 1:
+ char = source.get()
+ if char is None or char == ">":
+ break
+ name = name + char
+ group = 1
+ elif source.match(":"):
+ # non-capturing group
+ group = 2
+ elif source.match_set("iI"):
+ pattern.setflag("i")
+ elif source.match_set("lL"):
+ pattern.setflag("l")
+ elif source.match_set("mM"):
+ pattern.setflag("m")
+ elif source.match_set("sS"):
+ pattern.setflag("s")
+ elif source.match_set("xX"):
+ pattern.setflag("x")
+ if group:
+ # parse group contents
+ b = []
+ if group == 2:
+ # anonymous group
+ group = None
+ else:
+ group = pattern.getgroup(name)
+ if group:
+ subpattern.append((MARK, (group-1)*2))
+ while 1:
+ p = _parse(source, pattern, flags)
+ if source.match(")"):
+ if b:
+ b.append(p)
+ _branch(subpattern, b)
+ else:
+ subpattern.append((SUBPATTERN, (group, p)))
+ break
+ elif source.match("|"):
+ b.append(p)
+ else:
+ raise SyntaxError, "group not properly closed"
+ if group:
+ subpattern.append((MARK, (group-1)*2+1))
+ else:
+ # FIXME: should this really be a while loop?
+ while 1:
+ char = source.get()
+ if char is None or char == ")":
+ break
+
+ elif this == "^":
+ subpattern.append((AT, AT_BEGINNING))
+
+ elif this == "$":
+ subpattern.append((AT, AT_END))
+
+ elif this and this[0] == "\\":
+ code =_fixescape(this)
+ subpattern.append(code)
+
+ else:
+ raise SyntaxError, "parser error"
+
+ return subpattern
+
+def parse(source, flags=()):
+ s = Tokenizer(source)
+ g = Pattern()
+ b = []
+ while 1:
+ p = _parse(s, g, flags)
+ tail = s.get()
+ if tail == "|":
+ b.append(p)
+ elif tail == ")":
+ raise SyntaxError, "unbalanced parenthesis"
+ elif tail is None:
+ if b:
+ b.append(p)
+ p = SubPattern(g)
+ _branch(p, b)
+ break
+ else:
+ raise SyntaxError, "bogus characters at end of regular expression"
+ return p
+
+if __name__ == "__main__":
+ from pprint import pprint
+ from testpatterns import PATTERNS
+ a = b = c = 0
+ for pattern, flags in PATTERNS:
+ if flags:
+ continue
+ print "-"*68
+ try:
+ p = parse(pattern)
+ print repr(pattern), "->"
+ pprint(p.data)
+ import sre_compile
+ try:
+ code = sre_compile.compile(p)
+ c = c + 1
+ except:
+ pass
+ a = a + 1
+ except SyntaxError, v:
+ print "**", repr(pattern), v
+ b = b + 1
+ print "-"*68
+ print a, "of", b, "patterns successfully parsed"
+ print c, "of", b, "patterns successfully compiled"
+
-# Module 'statcache'
-#
-# Maintain a cache of file stats.
-# There are functions to reset the cache or to selectively remove items.
+"""Maintain a cache of stat() information on files.
+
+There are functions to reset the cache or to selectively remove items.
+"""
import os
from stat import *
cache = {}
-# Stat a file, possibly out of the cache.
-#
def stat(path):
+ """Stat a file, possibly out of the cache."""
if cache.has_key(path):
return cache[path]
cache[path] = ret = os.stat(path)
return ret
-# Reset the cache completely.
-#
def reset():
+ """Reset the cache completely."""
global cache
cache = {}
-# Remove a given item from the cache, if it exists.
-#
def forget(path):
+ """Remove a given item from the cache, if it exists."""
if cache.has_key(path):
del cache[path]
-# Remove all pathnames with a given prefix.
-#
def forget_prefix(prefix):
+ """Remove all pathnames with a given prefix."""
n = len(prefix)
for path in cache.keys():
if path[:n] == prefix:
del cache[path]
-# Forget about a directory and all entries in it, but not about
-# entries in subdirectories.
-#
def forget_dir(prefix):
+ """Forget about a directory and all entries in it, but not about
+ entries in subdirectories."""
if prefix[-1:] == '/' and prefix <> '/':
prefix = prefix[:-1]
forget(prefix)
del cache[path]
-# Remove all pathnames except with a given prefix.
-# Normally used with prefix = '/' after a chdir().
-#
def forget_except_prefix(prefix):
+ """Remove all pathnames except with a given prefix.
+ Normally used with prefix = '/' after a chdir()."""
n = len(prefix)
for path in cache.keys():
if path[:n] <> prefix:
del cache[path]
-# Check for directory.
-#
def isdir(path):
+ """Check for directory."""
try:
st = stat(path)
except os.error:
-# class StringIO implements file-like objects that read/write a
-# string buffer (a.k.a. "memory files").
-#
-# This implements (nearly) all stdio methods.
-#
-# f = StringIO() # ready for writing
-# f = StringIO(buf) # ready for reading
-# f.close() # explicitly release resources held
-# flag = f.isatty() # always false
-# pos = f.tell() # get current position
-# f.seek(pos) # set current position
-# f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
-# buf = f.read() # read until EOF
-# buf = f.read(n) # read up to n bytes
-# buf = f.readline() # read until end of line ('\n') or EOF
-# list = f.readlines()# list of f.readline() results until EOF
-# f.write(buf) # write at current position
-# f.writelines(list) # for line in list: f.write(line)
-# f.getvalue() # return whole file's contents as a string
-#
-# Notes:
-# - Using a real file is often faster (but less convenient).
-# - fileno() is left unimplemented so that code which uses it triggers
-# an exception early.
-# - Seeking far beyond EOF and then writing will insert real null
-# bytes that occupy space in the buffer.
-# - There's a simple test set (see end of this file).
+"""File-like objects that read from or write to a string buffer.
+
+This implements (nearly) all stdio methods.
+
+f = StringIO() # ready for writing
+f = StringIO(buf) # ready for reading
+f.close() # explicitly release resources held
+flag = f.isatty() # always false
+pos = f.tell() # get current position
+f.seek(pos) # set current position
+f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
+buf = f.read() # read until EOF
+buf = f.read(n) # read up to n bytes
+buf = f.readline() # read until end of line ('\n') or EOF
+list = f.readlines()# list of f.readline() results until EOF
+f.write(buf) # write at current position
+f.writelines(list) # for line in list: f.write(line)
+f.getvalue() # return whole file's contents as a string
+
+Notes:
+- Using a real file is often faster (but less convenient).
+- There's also a much faster implementation in C, called cStringIO, but
+ it's not subclassable.
+- fileno() is left unimplemented so that code which uses it triggers
+ an exception early.
+- Seeking far beyond EOF and then writing will insert real null
+ bytes that occupy space in the buffer.
+- There's a simple test set (see end of this file).
+"""
import string
--- /dev/null
+# module 'string' -- A collection of string operations
+
+# Warning: most of the code you see here isn't normally used nowadays. With
+# Python 1.6, many of these functions are implemented as methods on the
+# standard string object. They used to be implemented by a built-in module
+# called strop, but strop is now obsolete itself.
+
+"""Common string manipulations.
+
+Public module variables:
+
+whitespace -- a string containing all characters considered whitespace
+lowercase -- a string containing all characters considered lowercase letters
+uppercase -- a string containing all characters considered uppercase letters
+letters -- a string containing all characters considered letters
+digits -- a string containing all characters considered decimal digits
+hexdigits -- a string containing all characters considered hexadecimal digits
+octdigits -- a string containing all characters considered octal digits
+
+"""
+
+# Some strings for ctype-style character classification
+whitespace = ' \t\n\r\v\f'
+lowercase = 'abcdefghijklmnopqrstuvwxyz'
+uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+letters = lowercase + uppercase
+digits = '0123456789'
+hexdigits = digits + 'abcdef' + 'ABCDEF'
+octdigits = '01234567'
+
+# Case conversion helpers
+_idmap = ''
+for i in range(256): _idmap = _idmap + chr(i)
+del i
+
+# Backward compatible names for exceptions
+index_error = ValueError
+atoi_error = ValueError
+atof_error = ValueError
+atol_error = ValueError
+
+# convert UPPER CASE letters to lower case
+def lower(s):
+ """lower(s) -> string
+
+ Return a copy of the string s converted to lowercase.
+
+ """
+ return s.lower()
+
+# Convert lower case letters to UPPER CASE
+def upper(s):
+ """upper(s) -> string
+
+ Return a copy of the string s converted to uppercase.
+
+ """
+ return s.upper()
+
+# Swap lower case letters and UPPER CASE
+def swapcase(s):
+ """swapcase(s) -> string
+
+ Return a copy of the string s with upper case characters
+ converted to lowercase and vice versa.
+
+ """
+ return s.swapcase()
+
+# Strip leading and trailing tabs and spaces
+def strip(s):
+ """strip(s) -> string
+
+ Return a copy of the string s with leading and trailing
+ whitespace removed.
+
+ """
+ return s.strip()
+
+# Strip leading tabs and spaces
+def lstrip(s):
+ """lstrip(s) -> string
+
+ Return a copy of the string s with leading whitespace removed.
+
+ """
+ return s.lstrip()
+
+# Strip trailing tabs and spaces
+def rstrip(s):
+ """rstrip(s) -> string
+
+ Return a copy of the string s with trailing whitespace
+ removed.
+
+ """
+ return s.rstrip()
+
+
+# Split a string into a list of space/tab-separated words
+# NB: split(s) is NOT the same as splitfields(s, ' ')!
+def split(s, sep=None, maxsplit=0):
+ """split(str [,sep [,maxsplit]]) -> list of strings
+
+ Return a list of the words in the string s, using sep as the
+ delimiter string. If maxsplit is nonzero, splits into at most
+ maxsplit words If sep is not specified, any whitespace string
+ is a separator. Maxsplit defaults to 0.
+
+ (split and splitfields are synonymous)
+
+ """
+ return s.split(sep, maxsplit)
+splitfields = split
+
+# Join fields with optional separator
+def join(words, sep = ' '):
+ """join(list [,sep]) -> string
+
+ Return a string composed of the words in list, with
+ intervening occurences of sep. The default separator is a
+ single space.
+
+ (joinfields and join are synonymous)
+
+ """
+ return sep.join(words)
+joinfields = join
+
+# for a little bit of speed
+_apply = apply
+
+# Find substring, raise exception if not found
+def index(s, *args):
+ """index(s, sub [,start [,end]]) -> int
+
+ Like find but raises ValueError when the substring is not found.
+
+ """
+ return _apply(s.index, args)
+
+# Find last substring, raise exception if not found
+def rindex(s, *args):
+ """rindex(s, sub [,start [,end]]) -> int
+
+ Like rfind but raises ValueError when the substring is not found.
+
+ """
+ return _apply(s.rindex, args)
+
+# Count non-overlapping occurrences of substring
+def count(s, *args):
+ """count(s, sub[, start[,end]]) -> int
+
+ Return the number of occurrences of substring sub in string
+ s[start:end]. Optional arguments start and end are
+ interpreted as in slice notation.
+
+ """
+ return _apply(s.count, args)
+
+# Find substring, return -1 if not found
+def find(s, *args):
+ """find(s, sub [,start [,end]]) -> in
+
+ Return the lowest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.find, args)
+
+# Find last substring, return -1 if not found
+def rfind(s, *args):
+ """rfind(s, sub [,start [,end]]) -> int
+
+ Return the highest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.rfind, args)
+
+# for a bit of speed
+_float = float
+_int = int
+_long = long
+_StringType = type('')
+
+# Convert string to float
+def atof(s):
+ """atof(s) -> float
+
+ Return the floating point number represented by the string s.
+
+ """
+ if type(s) == _StringType:
+ return _float(s)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
+# Convert string to integer
+def atoi(*args):
+ """atoi(s [,base]) -> int
+
+ Return the integer represented by the string s in the given
+ base, which defaults to 10. The string s must consist of one
+ or more digits, possibly preceded by a sign. If base is 0, it
+ is chosen from the leading characters of s, 0 for octal, 0x or
+ 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
+ accepted.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to int(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_int, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
+
+# Convert string to long integer
+def atol(*args):
+ """atol(s [,base]) -> long
+
+ Return the long integer represented by the string s in the
+ given base, which defaults to 10. The string s must consist
+ of one or more digits, possibly preceded by a sign. If base
+ is 0, it is chosen from the leading characters of s, 0 for
+ octal, 0x or 0X for hexadecimal. If base is 16, a preceding
+ 0x or 0X is accepted. A trailing L or l is not accepted,
+ unless base is 0.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to long(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_long, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
+
+# Left-justify a string
+def ljust(s, width):
+ """ljust(s, width) -> string
+
+ Return a left-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
+
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return s + ' '*n
+
+# Right-justify a string
+def rjust(s, width):
+ """rjust(s, width) -> string
+
+ Return a right-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
+
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return ' '*n + s
+
+# Center a string
+def center(s, width):
+ """center(s, width) -> string
+
+ Return a center version of s, in a field of the specified
+ width. padded with spaces as needed. The string is never
+ truncated.
+
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ half = n/2
+ if n%2 and width%2:
+ # This ensures that center(center(s, i), j) = center(s, j)
+ half = half+1
+ return ' '*half + s + ' '*(n-half)
+
+# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
+# Decadent feature: the argument may be a string or a number
+# (Use of this is deprecated; it should be a string as with ljust c.s.)
+def zfill(x, width):
+ """zfill(x, width) -> string
+
+ Pad a numeric string x with zeros on the left, to fill a field
+ of the specified width. The string x is never truncated.
+
+ """
+ if type(x) == type(''): s = x
+ else: s = `x`
+ n = len(s)
+ if n >= width: return s
+ sign = ''
+ if s[0] in ('-', '+'):
+ sign, s = s[0], s[1:]
+ return sign + '0'*(width-n) + s
+
+# Expand tabs in a string.
+# Doesn't take non-printing chars into account, but does understand \n.
+def expandtabs(s, tabsize=8):
+ """expandtabs(s [,tabsize]) -> string
+
+ Return a copy of the string s with all tab characters replaced
+ by the appropriate number of spaces, depending on the current
+ column, and the tabsize (default 8).
+
+ """
+ res = line = ''
+ for c in s:
+ if c == '\t':
+ c = ' '*(tabsize - len(line) % tabsize)
+ line = line + c
+ if c == '\n':
+ res = res + line
+ line = ''
+ return res + line
+
+# Character translation through look-up table.
+def translate(s, table, deletions=""):
+ """translate(s,table [,deletechars]) -> string
+
+ Return a copy of the string s, where all characters occurring
+ in the optional argument deletechars are removed, and the
+ remaining characters have been mapped through the given
+ translation table, which must be a string of length 256.
+
+ """
+ return s.translate(table, deletions)
+
+# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
+def capitalize(s):
+ """capitalize(s) -> string
+
+ Return a copy of the string s with only its first character
+ capitalized.
+
+ """
+ return s.capitalize()
+
+# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
+# See also regsub.capwords().
+def capwords(s, sep=None):
+ """capwords(s, [sep]) -> string
+
+ Split the argument into words using split, capitalize each
+ word using capitalize, and join the capitalized words using
+ join. Note that this replaces runs of whitespace characters by
+ a single space.
+
+ """
+ return join(map(capitalize, s.split(sep)), sep or ' ')
+
+# Construct a translation string
+_idmapL = None
+def maketrans(fromstr, tostr):
+ """maketrans(frm, to) -> string
+
+ Return a translation table (a string of 256 bytes long)
+ suitable for use in string.translate. The strings frm and to
+ must be of the same length.
+
+ """
+ if len(fromstr) != len(tostr):
+ raise ValueError, "maketrans arguments must have same length"
+ global _idmapL
+ if not _idmapL:
+ _idmapL = map(None, _idmap)
+ L = _idmapL[:]
+ fromstr = map(ord, fromstr)
+ for i in range(len(fromstr)):
+ L[fromstr[i]] = tostr[i]
+ return joinfields(L, "")
+
+# Substring replacement (global)
+def replace(s, old, new, maxsplit=0):
+ """replace (str, old, new[, maxsplit]) -> string
+
+ Return a copy of string str with all occurrences of substring
+ old replaced by new. If the optional argument maxsplit is
+ given, only the first maxsplit occurrences are replaced.
+
+ """
+ return s.replace(old, new, maxsplit)
+
+
+# XXX: transitional
+#
+# If string objects do not have methods, then we need to use the old string.py
+# library, which uses strop for many more things than just the few outlined
+# below.
+try:
+ ''.upper
+except AttributeError:
+ from stringold import *
+
+# Try importing optional built-in module "strop" -- if it exists,
+# it redefines some string operations that are 100-1000 times faster.
+# It also defines values for whitespace, lowercase and uppercase
+# that match <ctype.h>'s definitions.
+
+try:
+ from strop import maketrans, lowercase, uppercase, whitespace
+ letters = lowercase + uppercase
+except ImportError:
+ pass # Use the original versions
opt = self.rawq_getchar()
self.msg('IAC %s %d',
c == WILL and 'WILL' or 'WONT', ord(c))
+ self.sock.send(IAC + DONT + opt)
else:
self.msg('IAC %s not recognized' % `c`)
except EOFError: # raised by self.rawq_getchar()
-#! /usr/bin/env python
-"""Test script for the binascii C module
-
- Uses the mechanism of the python binhex module
- Roger E. Masse
-"""
-import binhex
-import tempfile
+"""Test the binascii C module."""
+
from test_support import verbose
+import binascii
+
+# Show module doc string
+print binascii.__doc__
+
+# Show module exceptions
+print binascii.Error
+print binascii.Incomplete
+
+# Check presence and display doc strings of all functions
+funcs = []
+for suffix in "base64", "hqx", "uu":
+ prefixes = ["a2b_", "b2a_"]
+ if suffix == "hqx":
+ prefixes.extend(["crc_", "rlecode_", "rledecode_"])
+ for prefix in prefixes:
+ name = prefix + suffix
+ funcs.append(getattr(binascii, name))
+for func in funcs:
+ print "%-15s: %s" % (func.__name__, func.__doc__)
+
+# Create binary test data
+testdata = "The quick brown fox jumps over the lazy dog.\r\n"
+for i in range(256):
+ # Be slow so we don't depend on other modules
+ testdata = testdata + chr(i)
+testdata = testdata + "\r\nHello world.\n"
+
+# Test base64 with valid data
+print "base64 test"
+MAX_BASE64 = 57
+lines = []
+for i in range(0, len(testdata), MAX_BASE64):
+ b = testdata[i:i+MAX_BASE64]
+ a = binascii.b2a_base64(b)
+ lines.append(a)
+ print a,
+res = ""
+for line in lines:
+ b = binascii.a2b_base64(line)
+ res = res + b
+assert res == testdata
+
+# Test base64 with random invalid characters sprinkled throughout
+# (This requires a new version of binascii.)
+fillers = ""
+valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
+for i in range(256):
+ c = chr(i)
+ if c not in valid:
+ fillers = fillers + c
+def addnoise(line):
+ noise = fillers
+ ratio = len(line) / len(noise)
+ res = ""
+ while line and noise:
+ if len(line) / len(noise) > ratio:
+ c, line = line[0], line[1:]
+ else:
+ c, noise = noise[0], noise[1:]
+ res = res + c
+ return res + noise + line
+res = ""
+for line in map(addnoise, lines):
+ b = binascii.a2b_base64(line)
+ res = res + b
+assert res == testdata
+
+# Test uu
+print "uu test"
+MAX_UU = 45
+lines = []
+for i in range(0, len(testdata), MAX_UU):
+ b = testdata[i:i+MAX_UU]
+ a = binascii.b2a_uu(b)
+ lines.append(a)
+ print a,
+res = ""
+for line in lines:
+ b = binascii.a2b_uu(line)
+ res = res + b
+assert res == testdata
+
+# Test crc32()
+crc = binascii.crc32("Test the CRC-32 of")
+crc = binascii.crc32(" this string.", crc)
+if crc != 1571220330:
+ print "binascii.crc32() failed."
-def test():
-
- try:
- fname1 = tempfile.mktemp()
- fname2 = tempfile.mktemp()
- f = open(fname1, 'w')
- except:
- raise ImportError, "Cannot test binascii without a temp file"
-
- start = 'Jack is my hero'
- f.write(start)
- f.close()
-
- binhex.binhex(fname1, fname2)
- if verbose:
- print 'binhex'
-
- binhex.hexbin(fname2, fname1)
- if verbose:
- print 'hexbin'
-
- f = open(fname1, 'r')
- finish = f.readline()
-
- if start <> finish:
- print 'Error: binhex <> hexbin'
- elif verbose:
- print 'binhex == hexbin'
-
- try:
- import os
- os.unlink(fname1)
- os.unlink(fname2)
- except:
- pass
-test()
+# The hqx test is in test_binhex.py
--- /dev/null
+from test_support import TestFailed
+
+class base_set:
+
+ def __init__(self, el):
+ self.el = el
+
+class set(base_set):
+
+ def __contains__(self, el):
+ return self.el == el
+
+class seq(base_set):
+
+ def __getitem__(self, n):
+ return [self.el][n]
+
+def check(ok, *args):
+ if not ok:
+ raise TestFailed, " ".join(map(str, args))
+
+a = base_set(1)
+b = set(1)
+c = seq(1)
+
+check(1 in b, "1 not in set(1)")
+check(0 not in b, "0 in set(1)")
+check(1 in c, "1 not in seq(1)")
+check(0 not in c, "0 in seq(1)")
+
+try:
+ 1 in a
+ check(0, "in base_set did not raise error")
+except AttributeError:
+ pass
+
+try:
+ 1 not in a
+ check(0, "not in base_set did not raise error")
+except AttributeError:
+ pass
+
+# Test char in string
+
+check('c' in 'abc', "'c' not in 'abc'")
+check('d' not in 'abc', "'d' in 'abc'")
+
+try:
+ '' in 'abc'
+ check(0, "'' in 'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ 'ab' in 'abc'
+ check(0, "'ab' in 'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ None in 'abc'
+ check(0, "None in 'abc' did not raise error")
+except TypeError:
+ pass
+
+# Test char in Unicode
+
+check('c' in u'abc', "'c' not in u'abc'")
+check('d' not in u'abc', "'d' in u'abc'")
+
+try:
+ '' in u'abc'
+ check(0, "'' in u'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ 'ab' in u'abc'
+ check(0, "'ab' in u'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ None in u'abc'
+ check(0, "None in u'abc' did not raise error")
+except TypeError:
+ pass
+
+# Test Unicode char in Unicode
+
+check(u'c' in u'abc', "u'c' not in u'abc'")
+check(u'd' not in u'abc', "u'd' in u'abc'")
+
+try:
+ u'' in u'abc'
+ check(0, "u'' in u'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ u'ab' in u'abc'
+ check(0, "u'ab' in u'abc' did not raise error")
+except TypeError:
+ pass
+
+# Test Unicode char in string
+
+check(u'c' in 'abc', "u'c' not in 'abc'")
+check(u'd' not in 'abc', "u'd' in 'abc'")
+
+try:
+ u'' in 'abc'
+ check(0, "u'' in 'abc' did not raise error")
+except TypeError:
+ pass
+
+try:
+ u'ab' in 'abc'
+ check(0, "u'ab' in 'abc' did not raise error")
+except TypeError:
+ pass
+
+# A collection of tests on builtin sequence types
+a = range(10)
+for i in a:
+ check(i in a, "%s not in %s" % (`i`, `a`))
+check(16 not in a, "16 not in %s" % `a`)
+check(a not in a, "%s not in %s" % (`a`, `a`))
+
+a = tuple(a)
+for i in a:
+ check(i in a, "%s not in %s" % (`i`, `a`))
+check(16 not in a, "16 not in %s" % `a`)
+check(a not in a, "%s not in %s" % (`a`, `a`))
+
+class Deviant1:
+ """Behaves strangely when compared
+
+ This class is designed to make sure that the contains code
+ works when the list is modified during the check.
+ """
+
+ aList = range(15)
+
+ def __cmp__(self, other):
+ if other == 12:
+ self.aList.remove(12)
+ self.aList.remove(13)
+ self.aList.remove(14)
+ return 1
+
+check(Deviant1() not in Deviant1.aList, "Deviant1 failed")
+
+class Deviant2:
+ """Behaves strangely when compared
+
+ This class raises an exception during comparison. That in
+ turn causes the comparison to fail with a TypeError.
+ """
+
+ def __cmp__(self, other):
+ if other == 4:
+ raise RuntimeError, "gotcha"
+
+try:
+ check(Deviant2() not in a, "oops")
+except TypeError:
+ pass
f.close()
try:
cPickle.dump(123, f)
- except IOError:
+ except ValueError:
pass
else:
- print "dump to closed file should raise IOError"
+ print "dump to closed file should raise ValueError"
f = open(fn, "r")
f.close()
try:
cPickle.load(f)
- except IOError:
+ except ValueError:
pass
else:
- print "load from closed file should raise IOError"
+ print "load from closed file should raise ValueError"
os.remove(fn)
+ # Test specific bad cases
+ for i in range(10):
+ try:
+ x = cPickle.loads('garyp')
+ except cPickle.BadPickleGet, y:
+ del y
+ else:
+ print "unexpected success!"
+ break
+
+
dotest()
--- /dev/null
+from UserList import UserList
+
+def f(*a, **k):
+ print a, k
+
+def g(x, *y, **z):
+ print x, y, z
+
+def h(j=1, a=2, h=3):
+ print j, a, h
+
+f()
+f(1)
+f(1, 2)
+f(1, 2, 3)
+
+f(1, 2, 3, *(4, 5))
+f(1, 2, 3, *[4, 5])
+f(1, 2, 3, *UserList([4, 5]))
+f(1, 2, 3, **{'a':4, 'b':5})
+f(1, 2, 3, *(4, 5), **{'a':6, 'b':7})
+f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b':9})
+
+try:
+ g()
+except TypeError, err:
+ print "TypeError:", err
+else:
+ print "should raise TypeError: not enough arguments; expected 1, got 0"
+
+try:
+ g(*())
+except TypeError, err:
+ print "TypeError:", err
+else:
+ print "should raise TypeError: not enough arguments; expected 1, got 0"
+
+try:
+ g(*(), **{})
+except TypeError, err:
+ print "TypeError:", err
+else:
+ print "should raise TypeError: not enough arguments; expected 1, got 0"
+
+g(1)
+g(1, 2)
+g(1, 2, 3)
+g(1, 2, 3, *(4, 5))
+class Nothing: pass
+try:
+ g(*Nothing())
+except AttributeError, attr:
+ pass
+else:
+ print "should raise AttributeError: __len__"
+
+class Nothing:
+ def __len__(self):
+ return 5
+try:
+ g(*Nothing())
+except AttributeError, attr:
+ pass
+else:
+ print "should raise AttributeError: __getitem__"
+
+class Nothing:
+ def __len__(self):
+ return 5
+ def __getitem__(self, i):
+ if i < 3:
+ return i
+ else:
+ raise IndexError, i
+g(*Nothing())
+
+# make sure the function call doesn't stomp on the dictionary?
+d = {'a': 1, 'b': 2, 'c': 3}
+d2 = d.copy()
+assert d == d2
+g(1, d=4, **d)
+print d
+print d2
+assert d == d2, "function call modified dictionary"
+
+# what about willful misconduct?
+def saboteur(**kw):
+ kw['x'] = locals()
+d = {}
+saboteur(a=1, **d)
+assert d == {}
+
+try:
+ g(1, 2, 3, **{'x':4, 'y':5})
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: keyword parameter redefined"
+
+try:
+ g(1, 2, 3, a=4, b=5, *(6, 7), **{'a':8, 'b':9})
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: keyword parameter redefined"
+
+try:
+ f(**{1:2})
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: keywords must be strings"
+
+try:
+ h(**{'e': 2})
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: unexpected keyword argument: e"
+
+try:
+ h(*h)
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: * argument must be a tuple"
+
+try:
+ h(**h)
+except TypeError, err:
+ print err
+else:
+ print "should raise TypeError: ** argument must be a dictionary"
+
+def f2(*a, **b):
+ return a, b
+
+d = {}
+for i in range(512):
+ key = 'k%d' % i
+ d[key] = i
+a, b = f2(1, *(2, 3), **d)
+print len(a), len(b), b == d
if sys.platform in ('netbsd1',
'freebsd2', 'freebsd3',
- 'bsdos2', 'bsdos3', 'bsdos4'):
+ 'bsdos2', 'bsdos3', 'bsdos4',
+ 'openbsd', 'openbsd2'):
lockdata = struct.pack('lxxxxlxxxxlhh', 0, 0, 0, FCNTL.F_WRLCK, 0)
elif sys.platform in ['aix3', 'aix4']:
lockdata = struct.pack('hhlllii', FCNTL.F_WRLCK, 0, 0, 0, 0, 0, 0)
--- /dev/null
+"""This test checks for correct fork() behavior.
+
+We want fork1() semantics -- only the forking thread survives in the
+child after a fork().
+
+On some systems (e.g. Solaris without posix threads) we find that all
+active threads survive in the child after a fork(); this is an error.
+
+"""
+
+import os, sys, time, thread
+
+try:
+ os.fork
+except AttributeError:
+ raise ImportError, "os.fork not defined -- skipping test_fork1"
+
+LONGSLEEP = 2
+
+SHORTSLEEP = 0.5
+
+NUM_THREADS = 4
+
+alive = {}
+
+stop = 0
+
+def f(id):
+ while not stop:
+ alive[id] = os.getpid()
+ try:
+ time.sleep(SHORTSLEEP)
+ except IOError:
+ pass
+
+def main():
+ for i in range(NUM_THREADS):
+ thread.start_new(f, (i,))
+
+ time.sleep(LONGSLEEP)
+
+ a = alive.keys()
+ a.sort()
+ assert a == range(NUM_THREADS)
+
+ prefork_lives = alive.copy()
+
+ cpid = os.fork()
+
+ if cpid == 0:
+ # Child
+ time.sleep(LONGSLEEP)
+ n = 0
+ for key in alive.keys():
+ if alive[key] != prefork_lives[key]:
+ n = n+1
+ os._exit(n)
+ else:
+ # Parent
+ spid, status = os.waitpid(cpid, 0)
+ assert spid == cpid
+ assert status == 0, "cause = %d, exit = %d" % (status&0xff, status>>8)
+ global stop
+ # Tell threads to die
+ stop = 1
+ time.sleep(2*SHORTSLEEP) # Wait for threads to die
+
+main()
import gdbm
from gdbm import error
-from test_support import verbose
+from test_support import verbose, TestFailed
filename= '/tmp/delete_me'
g.has_key('a')
g.close()
+try:
+ g['a']
+except error:
+ pass
+else:
+ raise TestFailed, "expected gdbm.error accessing closed database"
g = gdbm.open(filename, 'r')
g.close()
g = gdbm.open(filename, 'rw')
print 'funcdef'
### 'def' NAME parameters ':' suite
### parameters: '(' [varargslist] ')'
-### varargslist: (fpdef ['=' test] ',')* '*' NAME
-### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
+### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME]
+### | ('**'|'*' '*') NAME)
+### | fpdef ['=' test] (',' fpdef ['=' test])* [',']
### fpdef: NAME | '(' fplist ')'
### fplist: fpdef (',' fpdef)* [',']
+### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test)
+### argument: [test '='] test # Really [keyword '='] test
def f1(): pass
+f1()
+f1(*())
+f1(*(), **{})
def f2(one_argument): pass
def f3(two, arguments): pass
def f4(two, (compound, (argument, list))): pass
def d01(a=1): pass
d01()
d01(1)
+d01(*(1,))
+d01(**{'a':2})
def d11(a, b=1): pass
d11(1)
d11(1, 2)
+d11(1, **{'b':2})
def d21(a, b, c=1): pass
d21(1, 2)
d21(1, 2, 3)
+d21(*(1, 2, 3))
+d21(1, *(2, 3))
+d21(1, 2, *(3,))
+d21(1, 2, **{'c':3})
def d02(a=1, b=2): pass
d02()
d02(1)
d02(1, 2)
+d02(*(1, 2))
+d02(1, *(2,))
+d02(1, **{'b':2})
+d02(**{'a': 1, 'b': 2})
def d12(a, b=1, c=2): pass
d12(1)
d12(1, 2)
d01v()
d01v(1)
d01v(1, 2)
+d01v(*(1, 2, 3, 4))
+d01v(*(1,))
+d01v(**{'a':2})
def d11v(a, b=1, *rest): pass
d11v(1)
d11v(1, 2)
d21v(1, 2)
d21v(1, 2, 3)
d21v(1, 2, 3, 4)
+d21v(*(1, 2, 3, 4))
+d21v(1, 2, **{'c': 3})
def d02v(a=1, b=2, *rest): pass
d02v()
d02v(1)
d02v(1, 2)
d02v(1, 2, 3)
+d02v(1, *(2, 3, 4))
+d02v(**{'a': 1, 'b': 2})
def d12v(a, b=1, c=2, *rest): pass
d12v(1)
d12v(1, 2)
d12v(1, 2, 3)
d12v(1, 2, 3, 4)
+d12v(*(1, 2, 3, 4))
+d12v(1, 2, *(3, 4, 5))
+d12v(1, *(2,), **{'c': 3})
def d22v(a, b, c=1, d=2, *rest): pass
d22v(1, 2)
d22v(1, 2, 3)
d22v(1, 2, 3, 4)
d22v(1, 2, 3, 4, 5)
+d22v(*(1, 2, 3, 4))
+d22v(1, 2, *(3, 4, 5))
+d22v(1, *(2, 3), **{'d': 4})
### stmt: simple_stmt | compound_stmt
# Tested below
v3(1,(2,3))
v3(1,(2,3),4)
v3(1,(2,3),4,5,6,7,8,9,0)
+print
import sys, time
c = sys.path[0]
x = time.time()
def test_division_2(x, y):
q, r = divmod(x, y)
q2, r2 = x/y, x%y
+ pab, pba = x*y, y*x
+ check(pab == pba, "multiplication does not commute for", x, y)
check(q == q2, "divmod returns different quotient than / for", x, y)
check(r == r2, "divmod returns different mod than % for", x, y)
check(x == q*y + r, "x != q*y + r after divmod on", x, y)
test_bitop_identities_2(x, y)
test_bitop_identities_3(x, y, getran((lenx + leny)/2))
-# ------------------------------------------------------ hex oct str atol
+# ------------------------------------------------- hex oct repr str atol
def slow_format(x, base):
if (x, base) == (0, 8):
def test_format_1(x):
from string import atol
- for base, mapper in (8, oct), (10, str), (16, hex):
+ for base, mapper in (8, oct), (10, repr), (16, hex):
got = mapper(x)
expected = slow_format(x, base)
check(got == expected, mapper.__name__, "returned",
got, "but expected", expected, "for", x)
check(atol(got, 0) == x, 'atol("%s", 0) !=' % got, x)
+ # str() has to be checked a little differently since there's no
+ # trailing "L"
+ got = str(x)
+ expected = slow_format(x, 10)[:-1]
+ check(got == expected, mapper.__name__, "returned",
+ got, "but expected", expected, "for", x)
def test_format(maxdigits=MAXDIGITS):
print "long str/hex/oct/atol"
--- /dev/null
+
+import mmap
+import string, os, re, sys
+
+PAGESIZE = mmap.PAGESIZE
+
+def test_both():
+ "Test mmap module on Unix systems and Windows"
+
+ # Create an mmap'ed file
+ f = open('foo', 'w+')
+
+ # Write 2 pages worth of data to the file
+ f.write('\0'* PAGESIZE)
+ f.write('foo')
+ f.write('\0'* (PAGESIZE-3) )
+
+ m = mmap.mmap(f.fileno(), 2 * PAGESIZE)
+ f.close()
+
+ # Simple sanity checks
+ print ' Position of foo:', string.find(m, 'foo') / float(PAGESIZE), 'pages'
+ assert string.find(m, 'foo') == PAGESIZE
+
+ print ' Length of file:', len(m) / float(PAGESIZE), 'pages'
+ assert len(m) == 2*PAGESIZE
+
+ print ' Contents of byte 0:', repr(m[0])
+ assert m[0] == '\0'
+ print ' Contents of first 3 bytes:', repr(m[0:3])
+ assert m[0:3] == '\0\0\0'
+
+ # Modify the file's content
+ print "\n Modifying file's content..."
+ m[0] = '3'
+ m[PAGESIZE +3: PAGESIZE +3+3]='bar'
+
+ # Check that the modification worked
+ print ' Contents of byte 0:', repr(m[0])
+ assert m[0] == '3'
+ print ' Contents of first 3 bytes:', repr(m[0:3])
+ assert m[0:3] == '3\0\0'
+ print ' Contents of second page:', m[PAGESIZE-1 : PAGESIZE + 7]
+ assert m[PAGESIZE-1 : PAGESIZE + 7] == '\0foobar\0'
+
+ m.flush()
+
+ # Test doing a regular expression match in an mmap'ed file
+ match=re.search('[A-Za-z]+', m)
+ if match == None:
+ print ' ERROR: regex match on mmap failed!'
+ else:
+ start, end = match.span(0)
+ length = end - start
+
+ print ' Regex match on mmap (page start, length of match):',
+ print start / float(PAGESIZE), length
+
+ assert start == PAGESIZE
+ assert end == PAGESIZE + 6
+
+ m.close()
+ os.unlink("foo")
+ print ' Test passed'
+
+test_both()
+
--- /dev/null
+# Very simple test - Parse a file and print what happens
+
+# XXX TypeErrors on calling handlers, or on bad return values from a
+# handler, are obscure and unhelpful.
+
+import sys, string
+import os
+
+import pyexpat
+
+class Outputter:
+ def StartElementHandler(self, name, attrs):
+ print 'Start element:\n\t', name, attrs
+
+ def EndElementHandler(self, name):
+ print 'End element:\n\t', name
+
+ def CharacterDataHandler(self, data):
+ data = string.strip(data)
+ if data:
+ print 'Character data:'
+ print '\t', repr(data)
+
+ def ProcessingInstructionHandler(self, target, data):
+ print 'PI:\n\t', target, data
+
+ def StartNamespaceDeclHandler(self, prefix, uri):
+ print 'NS decl:\n\t', prefix, uri
+
+ def EndNamespaceDeclHandler(self, prefix):
+ print 'End of NS decl:\n\t', prefix
+
+ def StartCdataSectionHandler(self):
+ print 'Start of CDATA section'
+
+ def EndCdataSectionHandler(self):
+ print 'End of CDATA section'
+
+ def CommentHandler(self, text):
+ print 'Comment:\n\t', repr(text)
+
+ def NotationDeclHandler(self, *args):
+ name, base, sysid, pubid = args
+ print 'Notation declared:', args
+
+ def UnparsedEntityDeclHandler(self, *args):
+ entityName, base, systemId, publicId, notationName = args
+ print 'Unparsed entity decl:\n\t', args
+
+ def NotStandaloneHandler(self, userData):
+ print 'Not standalone'
+ return 1
+
+ def ExternalEntityRefHandler(self, context, base, sysId, pubId):
+ print 'External entity ref:', context, base, sysId, pubId
+ return 1
+
+ def DefaultHandler(self, userData):
+ pass
+
+ def DefaultHandlerExpand(self, userData):
+ pass
+
+
+out = Outputter()
+parser = pyexpat.ParserCreate(namespace_separator='!')
+for name in ['StartElementHandler', 'EndElementHandler',
+ 'CharacterDataHandler', 'ProcessingInstructionHandler',
+ 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
+ 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
+ 'CommentHandler', 'StartCdataSectionHandler',
+ 'EndCdataSectionHandler',
+ 'DefaultHandler', 'DefaultHandlerExpand',
+ #'NotStandaloneHandler',
+ 'ExternalEntityRefHandler'
+ ]:
+ setattr(parser, name, getattr(out, name) )
+
+data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
+<?xml-stylesheet href="stylesheet.css"?>
+<!-- comment data -->
+<!DOCTYPE quotations SYSTEM "quotations.dtd" [
+<!ELEMENT root ANY>
+<!NOTATION notation SYSTEM "notation.jpeg">
+<!ENTITY acirc "â">
+<!ENTITY external_entity SYSTEM "entity.file">
+<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
+%unparsed_entity;
+]>
+
+<root>
+<myns:subelement xmlns:myns="http://www.python.org/namespace">
+ Contents of subelements
+</myns:subelement>
+<sub2><![CDATA[contents of CDATA section]]></sub2>
+&external_entity;
+</root>
+"""
+
+try:
+ parser.Parse(data, 1)
+except pyexpat.error:
+ print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
+ print '** Line', parser.ErrorLineNumber
+ print '** Column', parser.ErrorColumnNumber
+ print '** Byte', parser.ErrorByteIndex
+
fp.seek(0)
m = rfc822.Message(fp)
i = 0
+
for n, a in m.getaddrlist('to') + m.getaddrlist('cc'):
if verbose:
print 'name:', repr(n), 'addr:', repr(a)
print ' [no match]'
print 'not found:', repr(n), repr(a)
+ out = m.getdate('date')
+ if out:
+ if verbose:
+ print 'Date:', m.getheader('date')
+ if out == (1999, 1, 13, 23, 57, 35, 0, 0, 0):
+ if verbose:
+ print ' [matched]'
+ else:
+ if verbose:
+ print ' [no match]'
+ print 'Date conversion failed:', out
+
+# Note: all test cases must have the same date (in various formats),
+# or no date!
+
test('''Date: Wed, 13 Jan 1999 23:57:35 -0500
From: Guido van Rossum <guido@CNRI.Reston.VA.US>
To: "Guido van
test('''From: Barry <bwarsaw@python.org
To: guido@python.org (Guido: the Barbarian)
Subject: nonsense
+Date: Wednesday, January 13 1999 23:57:35 -0500
test''', [('Guido: the Barbarian', 'guido@python.org'),
])
test('''From: Barry <bwarsaw@python.org
To: guido@python.org (Guido: the Barbarian)
Cc: "Guido: the Madman" <guido@python.org>
+Date: 13-Jan-1999 23:57:35 EST
test''', [('Guido: the Barbarian', 'guido@python.org'),
('Guido: the Madman', 'guido@python.org')
test('''To: "The monster with
the very long name: Guido" <guido@python.org>
+Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('The monster with\n the very long name: Guido',
'guido@python.org')])
"'string-sig@python.org'" <string-sig@python.org>
Cc: fooz@bat.com, bart@toof.com
Cc: goit@lip.com
+Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'),
('Mike Fletcher', 'mfletch@vrtelecom.com'),
# This one is just twisted. I don't know what the proper result should be,
# but it shouldn't be to infloop, which is what used to happen!
test('''To: <[smtp:dd47@mail.xxx.edu]_at_hmhq@hdq-mdm1-imgout.companay.com>
+Date: Wed, 13 Jan 1999 23:57:35 -0500
test''', [('', ''),
('', 'dd47@mail.xxx.edu'),
('', '_at_hmhq@hdq-mdm1-imgout.companay.com')
])
+
+# This exercises the old commas-in-a-full-name bug, which should be doing the
+# right thing in recent versions of the module.
+test('''To: "last, first" <userid@foo.net>
+
+test''', [('last, first', 'userid@foo.net'),
+ ])
+
+test('''To: (Comment stuff) "Quoted name"@somewhere.com
+
+test''', [('Comment stuff', '"Quoted name"@somewhere.com'),
+ ])
+
+test('''To: :
+Cc: goit@lip.com
+Date: Wed, 13 Jan 1999 23:57:35 -0500
+
+test''', [('', 'goit@lip.com')])
+
if not canfork or os.fork():
# parent is server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- s.bind(hostname, PORT)
+ s.bind((hostname, PORT))
s.listen(1)
if verbose:
print 'parent accepting'
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if verbose:
print 'child connecting'
- s.connect(hostname, PORT)
+ s.connect((hostname, PORT))
msg = 'socket test'
s.send(msg)
data = s.recv(1024)
if len([1,2,3,4,5,6]) <> 6: raise TestFailed, 'len([1,2,3,4,5,6])'
if [1,2]+[3,4] <> [1,2,3,4]: raise TestFailed, 'list concatenation'
if [1,2]*3 <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3'
+if [1,2]*3L <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3L'
if 0*[1,2,3] <> []: raise TestFailed, 'list repetition 0*'
+if 0L*[1,2,3] <> []: raise TestFailed, 'list repetition 0L*'
if min([1,2]) <> 1 or max([1,2]) <> 2: raise TestFailed, 'min/max list'
if 0 in [0,1,2] and 1 in [0,1,2] and 2 in [0,1,2] and 3 not in [0,1,2]: pass
else: raise TestFailed, 'in/not in list'
print '6.5.3a Additional list operations'
a = [0,1,2,3,4]
+a[0L] = 1
+a[1L] = 2
+a[2L] = 3
+if a <> [1,2,3,3,4]: raise TestFailed, 'list item assignment [0L], [1L], [2L]'
a[0] = 5
a[1] = 6
a[2] = 7
if a <> [5,6,7,3,4]: raise TestFailed, 'list item assignment [0], [1], [2]'
+a[-2L] = 88
+a[-1L] = 99
+if a <> [5,6,7,88,99]: raise TestFailed, 'list item assignment [-2L], [-1L]'
a[-2] = 8
a[-1] = 9
if a <> [5,6,7,8,9]: raise TestFailed, 'list item assignment [-2], [-1]'
a[-3:] = []
a[1:1] = [1,2,3]
if a <> [0,1,2,3,4]: raise TestFailed, 'list slice assignment'
+a[ 1L : 4L] = [7,8,9]
+if a <> [0,7,8,9,4]: raise TestFailed, 'list slice assignment using long ints'
del a[1:4]
if a <> [0,4]: raise TestFailed, 'list slice deletion'
del a[0]
if a <> [4]: raise TestFailed, 'list item deletion [0]'
del a[-1]
if a <> []: raise TestFailed, 'list item deletion [-1]'
+a=range(0,5)
+del a[1L:4L]
+if a <> [0,4]: raise TestFailed, 'list slice deletion'
+del a[0L]
+if a <> [4]: raise TestFailed, 'list item deletion [0]'
+del a[-1L]
+if a <> []: raise TestFailed, 'list item deletion [-1]'
a.append(0)
a.append(1)
a.append(2)
z = range(12)
z.sort(myComparison)
+# Test extreme cases with long ints
+a = [0,1,2,3,4]
+if a[ -pow(2,128L): 3 ] != [0,1,2]:
+ raise TestFailed, "list slicing with too-small long integer"
+if a[ 3: pow(2,145L) ] != [3,4]:
+ raise TestFailed, "list slicing with too-large long integer"
+
print '6.6 Mappings == Dictionaries'
d = {}
if d.keys() <> []: raise TestFailed, '{}.keys()'
--- /dev/null
+""" Test script for the Unicode implementation.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""
+from test_support import verbose
+import sys
+
+def test(method, input, output, *args):
+ if verbose:
+ print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
+ try:
+ f = getattr(input, method)
+ value = apply(f, args)
+ except:
+ value = sys.exc_type
+ exc = sys.exc_info()[:2]
+ else:
+ exc = None
+ if value != output:
+ if verbose:
+ print 'no'
+ print '*',f, `input`, `output`, `value`
+ if exc:
+ print ' value == %s: %s' % (exc)
+ else:
+ if verbose:
+ print 'yes'
+
+test('capitalize', u' hello ', u' hello ')
+test('capitalize', u'hello ', u'Hello ')
+
+test('title', u' hello ', u' Hello ')
+test('title', u'hello ', u'Hello ')
+test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
+test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
+test('title', u"getInt", u'Getint')
+
+test('find', u'abcdefghiabc', 0, u'abc')
+test('find', u'abcdefghiabc', 9, u'abc', 1)
+test('find', u'abcdefghiabc', -1, u'def', 4)
+
+test('rfind', u'abcdefghiabc', 9, u'abc')
+
+test('lower', u'HeLLo', u'hello')
+test('lower', u'hello', u'hello')
+
+test('upper', u'HeLLo', u'HELLO')
+test('upper', u'HELLO', u'HELLO')
+
+if 0:
+ transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
+
+ test('maketrans', u'abc', transtable, u'xyz')
+ test('maketrans', u'abc', ValueError, u'xyzq')
+
+test('split', u'this is the split function',
+ [u'this', u'is', u'the', u'split', u'function'])
+test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
+test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
+test('split', u'a b c d', [u'a', u'b c d'], None, 1)
+test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
+test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
+test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
+test('split', u'a b c d', [u'a b c d'], None, 0)
+test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
+test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
+
+# join now works with any sequence type
+class Sequence:
+ def __init__(self): self.seq = 'wxyz'
+ def __len__(self): return len(self.seq)
+ def __getitem__(self, i): return self.seq[i]
+
+test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
+test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
+test('join', u' ', u'w x y z', Sequence())
+test('join', u' ', TypeError, 7)
+
+class BadSeq(Sequence):
+ def __init__(self): self.seq = [7, u'hello', 123L]
+
+test('join', u' ', TypeError, BadSeq())
+
+result = u''
+for i in range(10):
+ if i > 0:
+ result = result + u':'
+ result = result + u'x'*10
+test('join', u':', result, [u'x' * 10] * 10)
+test('join', u':', result, (u'x' * 10,) * 10)
+
+test('strip', u' hello ', u'hello')
+test('lstrip', u' hello ', u'hello ')
+test('rstrip', u' hello ', u' hello')
+test('strip', u'hello', u'hello')
+
+test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
+
+if 0:
+ test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
+
+ table = string.maketrans('a', u'A')
+ test('translate', u'abc', u'Abc', table)
+ test('translate', u'xyz', u'xyz', table)
+
+test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
+test('replace', u'one!two!three!', u'onetwothree', '!', '')
+test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
+test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
+test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
+test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
+test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
+test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
+test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
+
+test('startswith', u'hello', 1, u'he')
+test('startswith', u'hello', 1, u'hello')
+test('startswith', u'hello', 0, u'hello world')
+test('startswith', u'hello', 1, u'')
+test('startswith', u'hello', 0, u'ello')
+test('startswith', u'hello', 1, u'ello', 1)
+test('startswith', u'hello', 1, u'o', 4)
+test('startswith', u'hello', 0, u'o', 5)
+test('startswith', u'hello', 1, u'', 5)
+test('startswith', u'hello', 0, u'lo', 6)
+test('startswith', u'helloworld', 1, u'lowo', 3)
+test('startswith', u'helloworld', 1, u'lowo', 3, 7)
+test('startswith', u'helloworld', 0, u'lowo', 3, 6)
+
+test('endswith', u'hello', 1, u'lo')
+test('endswith', u'hello', 0, u'he')
+test('endswith', u'hello', 1, u'')
+test('endswith', u'hello', 0, u'hello world')
+test('endswith', u'helloworld', 0, u'worl')
+test('endswith', u'helloworld', 1, u'worl', 3, 9)
+test('endswith', u'helloworld', 1, u'world', 3, 12)
+test('endswith', u'helloworld', 1, u'lowo', 1, 7)
+test('endswith', u'helloworld', 1, u'lowo', 2, 7)
+test('endswith', u'helloworld', 1, u'lowo', 3, 7)
+test('endswith', u'helloworld', 0, u'lowo', 4, 7)
+test('endswith', u'helloworld', 0, u'lowo', 3, 8)
+test('endswith', u'ab', 0, u'ab', 0, 1)
+test('endswith', u'ab', 0, u'ab', 0, 0)
+
+test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
+test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
+test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
+test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
+
+if 0:
+ test('capwords', u'abc def ghi', u'Abc Def Ghi')
+ test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
+ test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
+
+# Comparisons:
+print 'Testing Unicode comparisons...',
+assert u'abc' == 'abc'
+assert 'abc' == u'abc'
+assert u'abc' == u'abc'
+assert u'abcd' > 'abc'
+assert 'abcd' > u'abc'
+assert u'abcd' > u'abc'
+assert u'abc' < 'abcd'
+assert 'abc' < u'abcd'
+assert u'abc' < u'abcd'
+print 'done.'
+
+test('ljust', u'abc', u'abc ', 10)
+test('rjust', u'abc', u' abc', 10)
+test('center', u'abc', u' abc ', 10)
+test('ljust', u'abc', u'abc ', 6)
+test('rjust', u'abc', u' abc', 6)
+test('center', u'abc', u' abc ', 6)
+test('ljust', u'abc', u'abc', 2)
+test('rjust', u'abc', u'abc', 2)
+test('center', u'abc', u'abc', 2)
+
+test('islower', u'a', 1)
+test('islower', u'A', 0)
+test('islower', u'\n', 0)
+test('islower', u'\u1FFc', 0)
+test('islower', u'abc', 1)
+test('islower', u'aBc', 0)
+test('islower', u'abc\n', 1)
+
+test('isupper', u'a', 0)
+test('isupper', u'A', 1)
+test('isupper', u'\n', 0)
+test('isupper', u'\u1FFc', 0)
+test('isupper', u'ABC', 1)
+test('isupper', u'AbC', 0)
+test('isupper', u'ABC\n', 1)
+
+test('istitle', u'a', 0)
+test('istitle', u'A', 1)
+test('istitle', u'\n', 0)
+test('istitle', u'\u1FFc', 1)
+test('istitle', u'A Titlecased Line', 1)
+test('istitle', u'A\nTitlecased Line', 1)
+test('istitle', u'A Titlecased, Line', 1)
+test('istitle', u'Greek \u1FFcitlecases ...', 1)
+test('istitle', u'Not a capitalized String', 0)
+test('istitle', u'Not\ta Titlecase String', 0)
+test('istitle', u'Not--a Titlecase String', 0)
+
+test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
+test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
+test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
+test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
+test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
+test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
+test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
+
+test('translate', u"abababc", u'bbbc', {ord('a'):None})
+test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
+test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
+
+# Contains:
+print 'Testing Unicode contains method...',
+assert ('a' in u'abdb') == 1
+assert ('a' in u'bdab') == 1
+assert ('a' in u'bdaba') == 1
+assert ('a' in u'bdba') == 1
+assert ('a' in u'bdba') == 1
+assert (u'a' in u'bdba') == 1
+assert (u'a' in u'bdb') == 0
+assert (u'a' in 'bdb') == 0
+assert (u'a' in 'bdba') == 1
+assert (u'a' in ('a',1,None)) == 1
+assert (u'a' in (1,None,'a')) == 1
+assert (u'a' in (1,None,u'a')) == 1
+assert ('a' in ('a',1,None)) == 1
+assert ('a' in (1,None,'a')) == 1
+assert ('a' in (1,None,u'a')) == 1
+assert ('a' in ('x',1,u'y')) == 0
+assert ('a' in ('x',1,None)) == 0
+print 'done.'
+
+# Formatting:
+print 'Testing Unicode formatting strings...',
+assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
+assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
+assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
+assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
+assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
+assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
+assert u"%c" % (u"abc",) == u'a'
+assert u"%c" % ("abc",) == u'a'
+assert u"%c" % (34,) == u'"'
+assert u"%c" % (36,) == u'$'
+assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
+assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
+assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
+# formatting jobs delegated from the string implementation:
+assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
+assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
+assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
+assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
+assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
+assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
+assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
+assert '...%s...' % u"abc" == u'...abc...'
+print 'done.'
+
+# Test builtin codecs
+print 'Testing builtin codecs...',
+
+assert unicode('hello','ascii') == u'hello'
+assert unicode('hello','utf-8') == u'hello'
+assert unicode('hello','utf8') == u'hello'
+assert unicode('hello','latin-1') == u'hello'
+
+try:
+ u'Andr\202 x'.encode('ascii')
+ u'Andr\202 x'.encode('ascii','strict')
+except ValueError:
+ pass
+else:
+ raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
+assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
+assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
+
+try:
+ unicode('Andr\202 x','ascii')
+ unicode('Andr\202 x','ascii','strict')
+except ValueError:
+ pass
+else:
+ raise AssertionError, "unicode('Andr\202') failed to raise an exception"
+assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
+assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
+
+assert u'hello'.encode('ascii') == 'hello'
+assert u'hello'.encode('utf-8') == 'hello'
+assert u'hello'.encode('utf8') == 'hello'
+assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
+assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
+assert u'hello'.encode('latin-1') == 'hello'
+
+u = u''.join(map(unichr, range(1024)))
+for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+ assert unicode(u.encode(encoding),encoding) == u
+
+u = u''.join(map(unichr, range(256)))
+for encoding in (
+ 'latin-1',
+ ):
+ try:
+ assert unicode(u.encode(encoding),encoding) == u
+ except AssertionError:
+ print '*** codec "%s" failed round-trip' % encoding
+ except ValueError,why:
+ print '*** codec for "%s" failed: %s' % (encoding, why)
+
+u = u''.join(map(unichr, range(128)))
+for encoding in (
+ 'ascii',
+ ):
+ try:
+ assert unicode(u.encode(encoding),encoding) == u
+ except AssertionError:
+ print '*** codec "%s" failed round-trip' % encoding
+ except ValueError,why:
+ print '*** codec for "%s" failed: %s' % (encoding, why)
+
+print 'done.'
+
+print 'Testing standard mapping codecs...',
+
+print '0-127...',
+s = ''.join(map(chr, range(128)))
+for encoding in (
+ 'cp037', 'cp1026',
+ 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
+ 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
+ 'cp863', 'cp865', 'cp866',
+ 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
+ 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
+ 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+ 'mac_cyrillic', 'mac_latin2',
+
+ 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
+ 'cp1256', 'cp1257', 'cp1258',
+ 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
+
+ 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
+ 'cp1006', 'cp875', 'iso8859_8',
+
+ ### These have undefined mappings:
+ #'cp424',
+
+ ):
+ try:
+ assert unicode(s,encoding).encode(encoding) == s
+ except AssertionError:
+ print '*** codec "%s" failed round-trip' % encoding
+ except ValueError,why:
+ print '*** codec for "%s" failed: %s' % (encoding, why)
+
+print '128-255...',
+s = ''.join(map(chr, range(128,256)))
+for encoding in (
+ 'cp037', 'cp1026',
+ 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
+ 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
+ 'cp863', 'cp865', 'cp866',
+ 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
+ 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
+ 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+ 'mac_cyrillic', 'mac_latin2',
+
+ ### These have undefined mappings:
+ #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
+ #'cp1256', 'cp1257', 'cp1258',
+ #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
+ #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
+
+ ### These fail the round-trip:
+ #'cp1006', 'cp875', 'iso8859_8',
+
+ ):
+ try:
+ assert unicode(s,encoding).encode(encoding) == s
+ except AssertionError:
+ print '*** codec "%s" failed round-trip' % encoding
+ except ValueError,why:
+ print '*** codec for "%s" failed: %s' % (encoding, why)
+
+print 'done.'
+
+print 'Testing Unicode string concatenation...',
+assert (u"abc" u"def") == u"abcdef"
+assert ("abc" u"def") == u"abcdef"
+assert (u"abc" "def") == u"abcdef"
+assert (u"abc" u"def" "ghi") == u"abcdefghi"
+assert ("abc" "def" u"ghi") == u"abcdefghi"
+print 'done.'
--- /dev/null
+# Test the windows specific win32reg module.
+# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
+
+from winreg import *
+import os, sys
+
+test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me"
+
+test_data = [
+ ("Int Value", 45, REG_DWORD),
+ ("String Val", "A string value", REG_SZ,),
+ (u"Unicode Val", u"A Unicode value", REG_SZ,),
+ ("StringExpand", "The path is %path%", REG_EXPAND_SZ),
+ ("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ),
+ ("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ),
+ ("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ),
+ ("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ),
+ ("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY),
+]
+
+def WriteTestData(root_key):
+ # Set the default value for this key.
+ SetValue(root_key, test_key_name, REG_SZ, "Default value")
+ key = CreateKey(root_key, test_key_name)
+ # Create a sub-key
+ sub_key = CreateKey(key, "sub_key")
+ # Give the sub-key some named values
+
+ for value_name, value_data, value_type in test_data:
+ SetValueEx(sub_key, value_name, 0, value_type, value_data)
+
+ # Check we wrote as many items as we thought.
+ nkeys, nvalues, since_mod = QueryInfoKey(key)
+ assert nkeys==1, "Not the correct number of sub keys"
+ assert nvalues==1, "Not the correct number of values"
+ nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
+ assert nkeys==0, "Not the correct number of sub keys"
+ assert nvalues==len(test_data), "Not the correct number of values"
+ # Close this key this way...
+ # (but before we do, copy the key as an integer - this allows
+ # us to test that the key really gets closed).
+ int_sub_key = int(sub_key)
+ CloseKey(sub_key)
+ try:
+ QueryInfoKey(int_sub_key)
+ raise RuntimeError, "It appears the CloseKey() function does not close the actual key!"
+ except EnvironmentError:
+ pass
+ # ... and close that key that way :-)
+ int_key = int(key)
+ key.Close()
+ try:
+ QueryInfoKey(int_key)
+ raise RuntimeError, "It appears the key.Close() function does not close the actual key!"
+ except EnvironmentError:
+ pass
+
+def ReadTestData(root_key):
+ # Check we can get default value for this key.
+ val = QueryValue(root_key, test_key_name)
+ assert val=="Default value", "Registry didnt give back the correct value"
+
+ key = OpenKey(root_key, test_key_name)
+ # Read the sub-keys
+ sub_key = OpenKey(key, "sub_key")
+ # Check I can enumerate over the values.
+ index = 0
+ while 1:
+ try:
+ data = EnumValue(sub_key, index)
+ except EnvironmentError:
+ break
+ assert data in test_data, "didnt read back the correct test data."
+ index = index + 1
+ assert index==len(test_data), "Didnt read the correct number of items"
+ # Check I can directly access each item
+ for value_name, value_data, value_type in test_data:
+ read_val, read_typ = QueryValueEx(sub_key, value_name)
+ assert read_val==value_data and read_typ == value_type, \
+ "Could not directly read the value"
+ sub_key.Close()
+ # Enumerate our main key.
+ read_val = EnumKey(key, 0)
+ assert read_val == "sub_key", "Read subkey value wrong"
+ try:
+ EnumKey(key, 1)
+ assert 0, "Was able to get a second key when I only have one!"
+ except EnvironmentError:
+ pass
+
+ key.Close()
+
+def DeleteTestData(root_key):
+ key = OpenKey(root_key, test_key_name, 0, KEY_ALL_ACCESS)
+ sub_key = OpenKey(key, "sub_key", 0, KEY_ALL_ACCESS)
+ # It is not necessary to delete the values before deleting
+ # the key (although subkeys must not exist). We delete them
+ # manually just to prove we can :-)
+ for value_name, value_data, value_type in test_data:
+ DeleteValue(sub_key, value_name)
+
+ nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
+ assert nkeys==0 and nvalues==0, "subkey not empty before delete"
+ sub_key.Close()
+ DeleteKey(key, "sub_key")
+
+ try:
+ # Shouldnt be able to delete it twice!
+ DeleteKey(key, "sub_key")
+ assert 0, "Deleting the key twice succeeded"
+ except EnvironmentError:
+ pass
+ key.Close()
+ DeleteKey(root_key, test_key_name)
+ # Opening should now fail!
+ try:
+ key = OpenKey(root_key, test_key_name)
+ assert 0, "Could open the non-existant key"
+ except WindowsError: # Use this error name this time
+ pass
+
+def TestAll(root_key):
+ WriteTestData(root_key)
+ ReadTestData(root_key)
+ DeleteTestData(root_key)
+
+# Test on my local machine.
+TestAll(HKEY_CURRENT_USER)
+print "Local registry tests worked"
+try:
+ remote_name = sys.argv[sys.argv.index("--remote")+1]
+except (IndexError, ValueError):
+ remote_name = None
+
+if remote_name is not None:
+ try:
+ remote_key = ConnectRegistry(remote_name, HKEY_CURRENT_USER)
+ except EnvironmentError, exc:
+ print "Could not connect to the remote machine -", exc.strerror
+ remote_key = None
+ if remote_key is not None:
+ TestAll(remote_key)
+ print "Remote registry tests worked"
+else:
+ print "Remote registry calls can be tested using",
+ print "'test_winreg.py --remote \\\\machine_name'"
+
--- /dev/null
+import zipfile, os
+
+srcname = "junk9630.tmp"
+zipname = "junk9708.tmp"
+
+try:
+ fp = open(srcname, "w") # Make a source file with some lines
+ for i in range(0, 1000):
+ fp.write("Test of zipfile line %d.\n" % i)
+ fp.close()
+
+ zip = zipfile.ZipFile(zipname, "w") # Create the ZIP archive
+ zip.write(srcname, srcname)
+ zip.write(srcname, "another.name")
+ zip.close()
+
+ zip = zipfile.ZipFile(zipname, "r") # Read the ZIP archive
+ zip.read("another.name")
+ zip.read(srcname)
+ zip.close()
+finally:
+ if os.path.isfile(srcname): # Remove temporary files
+ os.unlink(srcname)
+ if os.path.isfile(zipname):
+ os.unlink(zipname)
+
# in order to provide more variations.
for sync in [zlib.Z_NO_FLUSH, zlib.Z_SYNC_FLUSH, zlib.Z_FULL_FLUSH]:
for level in range(10):
- obj = zlib.compressobj( level )
- d = obj.compress( buf[:3000] )
- d = d + obj.flush( sync )
- d = d + obj.compress( buf[3000:] )
- d = d + obj.flush()
- if zlib.decompress(d) != buf:
- print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
- del obj
+ obj = zlib.compressobj( level )
+ d = obj.compress( buf[:3000] )
+ d = d + obj.flush( sync )
+ d = d + obj.compress( buf[3000:] )
+ d = d + obj.flush()
+ if zlib.decompress(d) != buf:
+ print "Decompress failed: flush mode=%i, level=%i" % (sync,level)
+ del obj
def ignore():
"""An empty function with a big string.
-# threading.py:
-# Proposed new threading module, emulating a subset of Java's threading model
+"""Proposed new threading module, emulating a subset of Java's threading model."""
import sys
import time
class _Semaphore(_Verbose):
- # After Tim Peters' semaphore class, but bnot quite the same (no maximum)
+ # After Tim Peters' semaphore class, but not quite the same (no maximum)
def __init__(self, value=1, verbose=None):
assert value >= 0, "Semaphore initial value must be >= 0"
def __init__(self):
Thread.__init__(self, name=_newname("Dummy-%d"))
- self.__Thread_started = 1
+ self._Thread__started = 1
_active_limbo_lock.acquire()
_active[_get_ident()] = self
_active_limbo_lock.release()
--- /dev/null
+import thread
+# Start empty thread to initialise thread mechanics (and global lock!)
+# This thread will finish immediately thus won't make much influence on
+# test results by itself, only by that fact that it initialises global lock
+thread.start_new_thread(lambda : 1, ())
+
+import test.pystone
+test.pystone.main()
+
-# Format and print Python stack traces
+"""Extract, format and print information about Python stack traces."""
import linecache
import string
def print_list(extracted_list, file=None):
+ """Print the list of tuples as returned by extract_tb() or
+ extract_stack() as a formatted stack trace to the given file."""
if not file:
file = sys.stderr
for filename, lineno, name, line in extracted_list:
_print(file, ' %s' % string.strip(line))
def format_list(extracted_list):
+ """Given a list of tuples as returned by extract_tb() or
+ extract_stack(), return a list of strings ready for printing.
+ Each string in the resulting list corresponds to the item with
+ the same index in the argument list. Each string ends in a
+ newline; the strings may contain internal newlines as well, for
+ those items whose source text line is not None."""
list = []
for filename, lineno, name, line in extracted_list:
item = ' File "%s", line %d, in %s\n' % (filename,lineno,name)
def print_tb(tb, limit=None, file=None):
+ """Print up to 'limit' stack trace entries from the traceback 'tb'.
+ If 'limit' is omitted or None, all entries are printed. If 'file' is
+ omitted or None, the output goes to sys.stderr; otherwise 'file'
+ should be an open file or file-like object with a write() method."""
if not file:
file = sys.stderr
if limit is None:
n = n+1
def format_tb(tb, limit = None):
+ """A shorthand for 'format_list(extract_stack(f, limit))."""
return format_list(extract_tb(tb, limit))
def extract_tb(tb, limit = None):
+ """Return a list of up to 'limit' pre-processed stack trace entries
+ extracted from the traceback object 'traceback'. This is useful for
+ alternate formatting of stack traces. If 'limit' is omitted or None,
+ all entries are extracted. A pre-processed stack trace entry is a
+ quadruple (filename, line number, function name, text) representing
+ the information that is usually printed for a stack trace. The text
+ is a string with leading and trailing whitespace stripped; if the
+ source is not available it is None."""
if limit is None:
if hasattr(sys, 'tracebacklimit'):
limit = sys.tracebacklimit
def print_exception(etype, value, tb, limit=None, file=None):
+ """Print exception information and up to 'limit' stack trace entries
+ from the traceback 'tb' to 'file'. This differs from print_tb() in
+ the following ways: (1) if traceback is not None, it prints a header
+ "Traceback (most recent call last):"; (2) it prints the exception type and
+ value after the stack trace; (3) if type is SyntaxError and value has
+ the appropriate format, it prints the line where the syntax error
+ occurred with a caret on the next line indicating the approximate
+ position of the error."""
if not file:
file = sys.stderr
if tb:
- _print(file, 'Traceback (innermost last):')
+ _print(file, 'Traceback (most recent call last):')
print_tb(tb, limit, file)
lines = format_exception_only(etype, value)
for line in lines[:-1]:
_print(file, lines[-1], '')
def format_exception(etype, value, tb, limit = None):
+ """Format a stack trace and the exception information. The arguments
+ have the same meaning as the corresponding arguments to
+ print_exception(). The return value is a list of strings, each
+ ending in a newline and some containing internal newlines. When
+ these lines are contatenated and printed, exactly the same text is
+ printed as does print_exception()."""
if tb:
- list = ['Traceback (innermost last):\n']
+ list = ['Traceback (most recent call last):\n']
list = list + format_tb(tb, limit)
else:
list = []
return list
def format_exception_only(etype, value):
+ """Format the exception part of a traceback. The arguments are the
+ exception type and value such as given by sys.last_type and
+ sys.last_value. The return value is a list of strings, each ending
+ in a newline. Normally, the list contains a single string;
+ however, for SyntaxError exceptions, it contains several lines that
+ (when printed) display detailed information about where the syntax
+ error occurred. The message indicating which exception occurred is
+ the always last string in the list."""
list = []
if type(etype) == types.ClassType:
stype = etype.__name__
def print_exc(limit=None, file=None):
+ """This is a shorthand for 'print_exception(sys.exc_type,
+ sys.exc_value, sys.exc_traceback, limit, file)'.
+ (In fact, it uses sys.exc_info() to retrieve the same information
+ in a thread-safe way.)"""
if not file:
file = sys.stderr
try:
etype = value = tb = None
def print_last(limit=None, file=None):
+ """This is a shorthand for 'print_exception(sys.last_type,
+ sys.last_value, sys.last_traceback, limit, file)'."""
if not file:
file = sys.stderr
print_exception(sys.last_type, sys.last_value, sys.last_traceback,
def print_stack(f=None, limit=None, file=None):
+ """This function prints a stack trace from its invocation point.
+ The optional 'f' argument can be used to specify an alternate stack
+ frame at which to start. The optional 'limit' and 'file' arguments
+ have the same meaning as for print_exception()."""
if f is None:
try:
raise ZeroDivisionError
print_list(extract_stack(f, limit), file)
def format_stack(f=None, limit=None):
+ """A shorthand for 'format_list(extract_stack(f, limit))'."""
if f is None:
try:
raise ZeroDivisionError
return format_list(extract_stack(f, limit))
def extract_stack(f=None, limit = None):
+ """Extract the raw traceback from the current stack frame. The
+ return value has the same format as for extract_tb(). The optional
+ 'f' and 'limit' arguments have the same meaning as for print_stack().
+ Each item in the list is a quadruple (filename, line number,
+ function name, text), and the entries are in order from oldest
+ to newest stack frame."""
if f is None:
try:
raise ZeroDivisionError
list.reverse()
return list
-# Calculate the correct line number of the traceback given in tb (even
-# with -O on).
-# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
-# in compile.c.
-# Revised version by Jim Hugunin to work with JPython too.
-
def tb_lineno(tb):
+ """Calculate the correct line number of the traceback given in tb
+ (even with -O on)."""
+
+ # Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line()
+ # in compile.c.
+ # Revised version by Jim Hugunin to work with JPython too.
+
c = tb.tb_frame.f_code
if not hasattr(c, 'co_lnotab'):
return tb.tb_lineno
-# A more or less complete user-defined wrapper around dictionary objects
+"""A more or less complete user-defined wrapper around dictionary objects."""
class UserDict:
def __init__(self, dict=None):
-# A more or less complete user-defined wrapper around list objects
+"""A more or less complete user-defined wrapper around list objects."""
class UserList:
- def __init__(self, list=None):
+ def __init__(self, initlist=None):
self.data = []
- if list is not None:
- if type(list) == type(self.data):
- self.data[:] = list
+ if initlist is not None:
+ # XXX should this accept an arbitary sequence?
+ if type(initlist) == type(self.data):
+ self.data[:] = initlist
+ elif isinstance(initlist, UserList):
+ self.data[:] = initlist.data[:]
else:
- self.data[:] = list.data[:]
+ self.data = list(initlist)
def __repr__(self): return repr(self.data)
def __cmp__(self, other):
if isinstance(other, UserList):
--- /dev/null
+#!/usr/bin/env python
+## vim:ts=4:et:nowrap
+"""A user-defined wrapper around string objects
+
+Note: string objects have grown methods in Python 1.6
+This module requires Python 1.6 or later.
+"""
+from types import StringType, UnicodeType
+import sys
+
+class UserString:
+ def __init__(self, seq):
+ if isinstance(seq, StringType) or isinstance(seq, UnicodeType):
+ self.data = seq
+ elif isinstance(seq, UserString):
+ self.data = seq.data[:]
+ else:
+ self.data = str(seq)
+ def __str__(self): return str(self.data)
+ def __repr__(self): return repr(self.data)
+ def __int__(self): return int(self.data)
+ def __long__(self): return long(self.data)
+ def __float__(self): return float(self.data)
+ def __complex__(self): return complex(self.data)
+ def __hash__(self): return hash(self.data)
+
+ def __cmp__(self, string):
+ if isinstance(string, UserString):
+ return cmp(self.data, string.data)
+ else:
+ return cmp(self.data, string)
+ def __contains__(self, char):
+ return char in self.data
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, index): return self.__class__(self.data[index])
+ def __getslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ return self.__class__(self.data[start:end])
+
+ def __add__(self, other):
+ if isinstance(other, UserString):
+ return self.__class__(self.data + other.data)
+ elif isinstance(other, StringType) or isinstance(other, UnicodeType):
+ return self.__class__(self.data + other)
+ else:
+ return self.__class__(self.data + str(other))
+ def __radd__(self, other):
+ if isinstance(other, StringType) or isinstance(other, UnicodeType):
+ return self.__class__(other + self.data)
+ else:
+ return self.__class__(str(other) + self.data)
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+
+ # the following methods are defined in alphabetical order:
+ def capitalize(self): return self.__class__(self.data.capitalize())
+ def center(self, width): return self.__class__(self.data.center(width))
+ def count(self, sub, start=0, end=sys.maxint):
+ return self.data.count(sub, start, end)
+ def encode(self, encoding=None, errors=None): # XXX improve this?
+ if encoding:
+ if errors:
+ return self.__class__(self.data.encode(encoding, errors))
+ else:
+ return self.__class__(self.data.encode(encoding))
+ else:
+ return self.__class__(self.data.encode())
+ def endswith(self, suffix, start=0, end=sys.maxint):
+ return self.data.endswith(suffix, start, end)
+ def expandtabs(self, tabsize=8):
+ return self.__class__(self.data.expandtabs(tabsize))
+ def find(self, sub, start=0, end=sys.maxint):
+ return self.data.find(sub, start, end)
+ def index(self, sub, start=0, end=sys.maxint):
+ return self.data.index(sub, start, end)
+ def isdecimal(self): return self.data.isdecimal()
+ def isdigit(self): return self.data.isdigit()
+ def islower(self): return self.data.islower()
+ def isnumeric(self): return self.data.isnumeric()
+ def isspace(self): return self.data.isspace()
+ def istitle(self): return self.data.istitle()
+ def isupper(self): return self.data.isupper()
+ def join(self, seq): return self.data.join(seq)
+ def ljust(self, width): return self.__class__(self.data.ljust(width))
+ def lower(self): return self.__class__(self.data.lower())
+ def lstrip(self): return self.__class__(self.data.lstrip())
+ def replace(self, old, new, maxsplit=-1):
+ return self.__class__(self.data.replace(old, new, maxsplit))
+ def rfind(self, sub, start=0, end=sys.maxint):
+ return self.data.rfind(sub, start, end)
+ def rindex(self, sub, start=0, end=sys.maxint):
+ return self.data.rindex(sub, start, end)
+ def rjust(self, width): return self.__class__(self.data.rjust(width))
+ def rstrip(self): return self.__class__(self.data.rstrip())
+ def split(self, sep=None, maxsplit=-1):
+ return self.data.split(sep, maxsplit)
+ def splitlines(self, keepends=0): return self.data.splitlines(keepends)
+ def startswith(self, prefix, start=0, end=sys.maxint):
+ return self.data.startswith(prefix, start, end)
+ def strip(self): return self.__class__(self.data.strip())
+ def swapcase(self): return self.__class__(self.data.swapcase())
+ def title(self): return self.__class__(self.data.title())
+ def translate(self, table, deletechars=""):
+ return self.__class__(self.data.translate(table, deletechars))
+ def upper(self): return self.__class__(self.data.upper())
+
+class MutableString(UserString):
+ """mutable string objects
+
+ Python strings are immutable objects. This has the advantage, that
+ strings may be used as dictionary keys. If this property isn't needed
+ and you insist on changing string values in place instead, you may cheat
+ and use MutableString.
+
+ But the purpose of this class is an educational one: to prevent
+ people from inventing their own mutable string class derived
+ from UserString and than forget thereby to remove (override) the
+ __hash__ method inherited from ^UserString. This would lead to
+ errors that would be very hard to track down.
+
+ A faster and better solution is to rewrite your program using lists."""
+ def __init__(self, string=""):
+ self.data = string
+ def __hash__(self):
+ raise TypeError, "unhashable type (it is mutable)"
+ def __setitem__(self, index, sub):
+ if index < 0 or index >= len(self.data): raise IndexError
+ self.data = self.data[:index] + sub + self.data[index+1:]
+ def __delitem__(self, index):
+ if index < 0 or index >= len(self.data): raise IndexError
+ self.data = self.data[:index] + self.data[index+1:]
+ def __setslice__(self, start, end, sub):
+ start = max(start, 0); end = max(end, 0)
+ if isinstance(sub, UserString):
+ self.data = self.data[:start]+sub.data+self.data[end:]
+ elif isinstance(sub, StringType) or isinstance(sub, UnicodeType):
+ self.data = self.data[:start]+sub+self.data[end:]
+ else:
+ self.data = self.data[:start]+str(sub)+self.data[end:]
+ def __delslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ self.data = self.data[:start] + self.data[end:]
+ def immutable(self):
+ return UserString(self.data)
+
+if __name__ == "__main__":
+ # execute the regression test to stdout, if called as a script:
+ import os
+ called_in_dir, called_as = os.path.split(sys.argv[0])
+ called_in_dir = os.path.abspath(called_in_dir)
+ called_as, py = os.path.splitext(called_as)
+ sys.path.append(os.path.join(called_in_dir, 'test'))
+ if '-q' in sys.argv:
+ import test_support
+ test_support.verbose = 0
+ __import__('test_' + called_as.lower())