Doc strings and reformatting with 4 spaces bty Mitch Chapman.

author Guido van Rossum <guido@python.org>

Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)

committer Guido van Rossum <guido@python.org>

Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)
author Guido van Rossum <guido@python.org>
Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)
committer Guido van Rossum <guido@python.org>
Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)
diff --git a/Lib/rfc822.py b/Lib/rfc822.py

index d0e5bcbc9575c1e9e0a265983ddd2ea017b1d2da..89392f6161fe948c29cdbee24c9b9ecc1e95bbad 100644 (file)
--- a/Lib/rfc822.py
+++ b/Lib/rfc822.py
@@ -1,287 +1,308 @@
-# RFC-822 message manipulation class.
-#
-# XXX This is only a very rough sketch of a full RFC-822 parser;
-# in particular the tokenizing of addresses does not adhere to all the
-# quoting rules.
-#
-# Directions for use:
-#
-# To create a Message object: first open a file, e.g.:
-#   fp = open(file, 'r')
-# (or use any other legal way of getting an open file object, e.g. use
-# sys.stdin or call os.popen()).
-# Then pass the open file object to the Message() constructor:
-#   m = Message(fp)
-#
-# To get the text of a particular header there are several methods:
-#   str = m.getheader(name)
-#   str = m.getrawheader(name)
-# where name is the name of the header, e.g. 'Subject'.
-# The difference is that getheader() strips the leading and trailing
-# whitespace, while getrawheader() doesn't.  Both functions retain
-# embedded whitespace (including newlines) exactly as they are
-# specified in the header, and leave the case of the text unchanged.
-#
-# For addresses and address lists there are functions
-#   realname, mailaddress = m.getaddr(name) and
-#   list = m.getaddrlist(name)
-# where the latter returns a list of (realname, mailaddr) tuples.
-#
-# There is also a method
-#   time = m.getdate(name)
-# which parses a Date-like field and returns a time-compatible tuple,
-# i.e. a tuple such as returned by time.localtime() or accepted by
-# time.mktime().
-#
-# See the class definition for lower level access methods.
-#
-# There are also some utility functions here.
-
+"""RFC-822 message manipulation class.
+
+XXX This is only a very rough sketch of a full RFC-822 parser;
+in particular the tokenizing of addresses does not adhere to all the
+quoting rules.
+
+Directions for use:
+
+To create a Message object: first open a file, e.g.:
+  fp = open(file, 'r')
+(or use any other legal way of getting an open file object, e.g. use
+sys.stdin or call os.popen()).
+Then pass the open file object to the Message() constructor:
+  m = Message(fp)
+
+To get the text of a particular header there are several methods:
+  str = m.getheader(name)
+  str = m.getrawheader(name)
+where name is the name of the header, e.g. 'Subject'.
+The difference is that getheader() strips the leading and trailing
+whitespace, while getrawheader() doesn't.  Both functions retain
+embedded whitespace (including newlines) exactly as they are
+specified in the header, and leave the case of the text unchanged.
+
+For addresses and address lists there are functions
+  realname, mailaddress = m.getaddr(name) and
+  list = m.getaddrlist(name)
+where the latter returns a list of (realname, mailaddr) tuples.
+
+There is also a method
+  time = m.getdate(name)
+which parses a Date-like field and returns a time-compatible tuple,
+i.e. a tuple such as returned by time.localtime() or accepted by
+time.mktime().
+
+See the class definition for lower level access methods.
+
+There are also some utility functions here.
+"""
  
  import re
  import string
  import time
  
  
-_blanklines = ('\r\n', '\n')           # Optimization for islast()
+_blanklines = ('\r\n', '\n')            # Optimization for islast()
  
  
  class Message:
-
-       # Initialize the class instance and read the headers.
-       
-       def __init__(self, fp, seekable = 1):
-               self.fp = fp
-               self.seekable = seekable
-               self.startofheaders = None
-               self.startofbody = None
-               #
-               if self.seekable:
-                       try:
-                               self.startofheaders = self.fp.tell()
-                       except IOError:
-                               self.seekable = 0
-               #
-               self.readheaders()
-               #
-               if self.seekable:
-                       try:
-                               self.startofbody = self.fp.tell()
-                       except IOError:
-                               self.seekable = 0
-
-
-       # Rewind the file to the start of the body (if seekable).
-
-       def rewindbody(self):
-               if not self.seekable:
-                       raise IOError, "unseekable file"
-               self.fp.seek(self.startofbody)
-
-
-       # Read header lines up to the entirely blank line that
-       # terminates them.  The (normally blank) line that ends the
-       # headers is skipped, but not included in the returned list.
-       # If a non-header line ends the headers, (which is an error),
-       # an attempt is made to backspace over it; it is never
-       # included in the returned list.
-       #
-       # The variable self.status is set to the empty string if all
-       # went well, otherwise it is an error message.
-       # The variable self.headers is a completely uninterpreted list
-       # of lines contained in the header (so printing them will
-       # reproduce the header exactly as it appears in the file).
-
-       def readheaders(self):
-               self.dict = {}
-               self.unixfrom = ''
-               self.headers = list = []
-               self.status = ''
-               headerseen = ""
-               firstline = 1
-               while 1:
-                       line = self.fp.readline()
-                       if not line:
-                               self.status = 'EOF in headers'
-                               break
-                       # Skip unix From name time lines
-                       if firstline and line[:5] == 'From ':
-                               self.unixfrom = self.unixfrom + line
-                               continue
-                       firstline = 0
-                       if self.islast(line):
-                               break
-                       elif headerseen and line[0] in ' \t':
-                               # It's a continuation line.
-                               list.append(line)
-                               x = (self.dict[headerseen] + "\n " +
-                                    string.strip(line))
-                               self.dict[headerseen] = string.strip(x)
-                       elif ':' in line:
-                               # It's a header line.
-                               list.append(line)
-                               i = string.find(line, ':')
-                               headerseen = string.lower(line[:i])
-                               self.dict[headerseen] = string.strip(
-                                       line[i+1:])
-                       else:
-                               # It's not a header line; stop here.
-                               if not headerseen:
-                                       self.status = 'No headers'
-                               else:
-                                       self.status = 'Bad header'
-                               # Try to undo the read.
-                               if self.seekable:
-                                       self.fp.seek(-len(line), 1)
-                               else:
-                                       self.status = \
-                                               self.status + '; bad seek'
-                               break
-
-
-       # Method to determine whether a line is a legal end of
-       # RFC-822 headers.  You may override this method if your
-       # application wants to bend the rules, e.g. to strip trailing
-       # whitespace, or to recognise MH template separators
-       # ('--------').  For convenience (e.g. for code reading from
-       # sockets) a line consisting of \r\n also matches.
-
-       def islast(self, line):
-               return line in _blanklines
-
-
-       # Look through the list of headers and find all lines matching
-       # a given header name (and their continuation lines).
-       # A list of the lines is returned, without interpretation.
-       # If the header does not occur, an empty list is returned.
-       # If the header occurs multiple times, all occurrences are
-       # returned.  Case is not important in the header name.
-
-       def getallmatchingheaders(self, name):
-               name = string.lower(name) + ':'
-               n = len(name)
-               list = []
-               hit = 0
-               for line in self.headers:
-                       if string.lower(line[:n]) == name:
-                               hit = 1
-                       elif line[:1] not in string.whitespace:
-                               hit = 0
-                       if hit:
-                               list.append(line)
-               return list
-
-
-       # Similar, but return only the first matching header (and its
-       # continuation lines).
-
-       def getfirstmatchingheader(self, name):
-               name = string.lower(name) + ':'
-               n = len(name)
-               list = []
-               hit = 0
-               for line in self.headers:
-                       if hit:
-                               if line[:1] not in string.whitespace:
-                                       break
-                       elif string.lower(line[:n]) == name:
-                               hit = 1
-                       if hit:
-                               list.append(line)
-               return list
-
-
-       # A higher-level interface to getfirstmatchingheader().
-       # Return a string containing the literal text of the header
-       # but with the keyword stripped.  All leading, trailing and
-       # embedded whitespace is kept in the string, however.
-       # Return None if the header does not occur.
-
-       def getrawheader(self, name):
-               list = self.getfirstmatchingheader(name)
-               if not list:
-                       return None
-               list[0] = list[0][len(name) + 1:]
-               return string.joinfields(list, '')
-
-
-       # The normal interface: return a stripped version of the
-       # header value with a name, or None if it doesn't exist.  This
-       # uses the dictionary version which finds the *last* such
-       # header.
-
-       def getheader(self, name):
-               try:
-                       return self.dict[string.lower(name)]
-               except KeyError:
-                       return None
-
-
-       # Retrieve a single address from a header as a tuple, e.g.
-       # ('Guido van Rossum', 'guido@cwi.nl').
-
-       def getaddr(self, name):
-               # New, by Ben Escoto
-               alist = self.getaddrlist(name)
-               if alist:
-                       return alist[0]
-               else:
-                       return (None, None)
-
-       # Retrieve a list of addresses from a header, where each
-       # address is a tuple as returned by getaddr().
-
-       def getaddrlist(self, name):
-               # New, by Ben Escoto
-               try:
-                       data = self[name]
-               except KeyError:
-                       return []
-               a = AddrlistClass(data)
-               return a.getaddrlist()
-
-       # Retrieve a date field from a header as a tuple compatible
-       # with time.mktime().
-
-       def getdate(self, name):
-               try:
-                       data = self[name]
-               except KeyError:
-                       return None
-               return parsedate(data)
-
-       # Retrieve a date field from a header as a 10-tuple.  
-       # The first 9 elements make up a tuple compatible
-       # with time.mktime(), and the 10th is the offset
-       # of the poster's time zone from GMT/UTC.
-
-       def getdate_tz(self, name):
-               try:
-                       data = self[name]
-               except KeyError:
-                       return None
-               return parsedate_tz(data)
-
-
-       # Access as a dictionary (only finds *last* header of each type):
-
-       def __len__(self):
-               return len(self.dict)
-
-       def __getitem__(self, name):
-               return self.dict[string.lower(name)]
-
-       def has_key(self, name):
-               return self.dict.has_key(string.lower(name))
-
-       def keys(self):
-               return self.dict.keys()
-
-       def values(self):
-               return self.dict.values()
-
-       def items(self):
-               return self.dict.items()
+    """Represents a single RFC-822-compliant message."""
+    
+    def __init__(self, fp, seekable = 1):
+        """Initialize the class instance and read the headers."""
+        self.fp = fp
+        self.seekable = seekable
+        self.startofheaders = None
+        self.startofbody = None
+        #
+        if self.seekable:
+            try:
+                self.startofheaders = self.fp.tell()
+            except IOError:
+                self.seekable = 0
+        #
+        self.readheaders()
+        #
+        if self.seekable:
+            try:
+                self.startofbody = self.fp.tell()
+            except IOError:
+                self.seekable = 0
+    
+    def rewindbody(self):
+        """Rewind the file to the start of the body (if seekable)."""
+        if not self.seekable:
+            raise IOError, "unseekable file"
+        self.fp.seek(self.startofbody)
+    
+    def readheaders(self):
+        """Read header lines.
+        
+        Read header lines up to the entirely blank line that
+        terminates them.  The (normally blank) line that ends the
+        headers is skipped, but not included in the returned list.
+        If a non-header line ends the headers, (which is an error),
+        an attempt is made to backspace over it; it is never
+        included in the returned list.
+        
+        The variable self.status is set to the empty string if all
+        went well, otherwise it is an error message.
+        The variable self.headers is a completely uninterpreted list
+        of lines contained in the header (so printing them will
+        reproduce the header exactly as it appears in the file).
+        """
+        self.dict = {}
+        self.unixfrom = ''
+        self.headers = list = []
+        self.status = ''
+        headerseen = ""
+        firstline = 1
+        while 1:
+            line = self.fp.readline()
+            if not line:
+                self.status = 'EOF in headers'
+                break
+            # Skip unix From name time lines
+            if firstline and line[:5] == 'From ':
+                self.unixfrom = self.unixfrom + line
+                continue
+            firstline = 0
+            if self.islast(line):
+                break
+            elif headerseen and line[0] in ' \t':
+                # It's a continuation line.
+                list.append(line)
+                x = (self.dict[headerseen] + "\n " +
+                string.strip(line))
+                self.dict[headerseen] = string.strip(x)
+            elif ':' in line:
+                # It's a header line.
+                list.append(line)
+                i = string.find(line, ':')
+                headerseen = string.lower(line[:i])
+                self.dict[headerseen] = string.strip(
+                line[i+1:])
+            else:
+                # It's not a header line; stop here.
+                if not headerseen:
+                    self.status = 'No headers'
+                else:
+                    self.status = 'Bad header'
+                # Try to undo the read.
+                if self.seekable:
+                    self.fp.seek(-len(line), 1)
+                else:
+                    self.status = \
+                                                self.status + '; bad seek'
+                break
+    
+    def islast(self, line):
+        """Determine whether a line is a legal end of RFC-822 headers.
+        
+        You may override this method if your application wants
+        to bend the rules, e.g. to strip trailing whitespace,
+        or to recognise MH template separators ('--------').
+        For convenience (e.g. for code reading from sockets) a
+        line consisting of \r\n also matches.                
+        """
+        return line in _blanklines
+    
+    def getallmatchingheaders(self, name):
+        """Find all header lines matching a given header name.
+        
+        Look through the list of headers and find all lines
+        matching a given header name (and their continuation
+        lines).  A list of the lines is returned, without
+        interpretation.  If the header does not occur, an
+        empty list is returned.  If the header occurs multiple
+        times, all occurrences are returned.  Case is not
+        important in the header name.
+        """
+        name = string.lower(name) + ':'
+        n = len(name)
+        list = []
+        hit = 0
+        for line in self.headers:
+            if string.lower(line[:n]) == name:
+                hit = 1
+            elif line[:1] not in string.whitespace:
+                hit = 0
+            if hit:
+                list.append(line)
+        return list
+    
+    def getfirstmatchingheader(self, name):
+        """Get the first header line matching name.
+        
+        This is similar to getallmatchingheaders, but it returns
+        only the first matching header (and its continuation
+        lines).
+        """
+        name = string.lower(name) + ':'
+        n = len(name)
+        list = []
+        hit = 0
+        for line in self.headers:
+            if hit:
+                if line[:1] not in string.whitespace:
+                    break
+            elif string.lower(line[:n]) == name:
+                hit = 1
+            if hit:
+                list.append(line)
+        return list
+    
+    def getrawheader(self, name):
+        """A higher-level interface to getfirstmatchingheader().
+        
+        Return a string containing the literal text of the
+        header but with the keyword stripped.  All leading,
+        trailing and embedded whitespace is kept in the
+        string, however.
+        Return None if the header does not occur.
+        """
+        
+        list = self.getfirstmatchingheader(name)
+        if not list:
+            return None
+        list[0] = list[0][len(name) + 1:]
+        return string.joinfields(list, '')
+    
+    def getheader(self, name):
+        """Get the header value for a name.
+        
+        This is the normal interface: it return a stripped
+        version of the header value for a given header name,
+        or None if it doesn't exist.  This uses the dictionary
+        version which finds the *last* such header.
+        """
+        try:
+            return self.dict[string.lower(name)]
+        except KeyError:
+            return None
+    
+    def getaddr(self, name):
+        """Get a single address from a header, as a tuple.
+        
+        An example return value:
+        ('Guido van Rossum', 'guido@cwi.nl')
+        """
+        # New, by Ben Escoto
+        alist = self.getaddrlist(name)
+        if alist:
+            return alist[0]
+        else:
+            return (None, None)
+    
+    def getaddrlist(self, name):
+        """Get a list of addresses from a header.
+        
+        Retrieves a list of addresses from a header, where each
+        address is a tuple as returned by getaddr().
+        """
+        # New, by Ben Escoto
+        try:
+            data = self[name]
+        except KeyError:
+            return []
+        a = AddrlistClass(data)
+        return a.getaddrlist()
+    
+    def getdate(self, name):
+        """Retrieve a date field from a header.
+        
+        Retrieves a date field from the named header, returning
+        a tuple compatible with time.mktime().
+        """
+        try:
+            data = self[name]
+        except KeyError:
+            return None
+        return parsedate(data)
+    
+    def getdate_tz(self, name):
+        """Retrieve a date field from a header as a 10-tuple.
+        
+        The first 9 elements make up a tuple compatible with
+        time.mktime(), and the 10th is the offset of the poster's
+        time zone from GMT/UTC.
+        """
+        try:
+            data = self[name]
+        except KeyError:
+            return None
+        return parsedate_tz(data)
+    
+    
+    # Access as a dictionary (only finds *last* header of each type):
+    
+    def __len__(self):
+        """Get the number of headers in a message."""
+        return len(self.dict)
+    
+    def __getitem__(self, name):
+        """Get a specific header, as from a dictionary."""
+        return self.dict[string.lower(name)]
+    
+    def has_key(self, name):
+        """Determine whether a message contains the named header."""
+        return self.dict.has_key(string.lower(name))
+    
+    def keys(self):
+        """Get all of a message's header field names."""
+        return self.dict.keys()
+    
+    def values(self):
+        """Get all of a message's header field values."""
+        return self.dict.values()
+    
+    def items(self):
+        """Get all of a message's headers.
+        
+        Returns a list of name, value tuples.
+        """
+        return self.dict.items()
  
  
  
@@ -292,272 +313,294 @@ class Message:
  # XXX The inverses of the parse functions may also be useful.
  
  
-# Remove quotes from a string.
-
  def unquote(str):
-       if len(str) > 1:
-               if str[0] == '"' and str[-1:] == '"':
-                       return str[1:-1]
-               if str[0] == '<' and str[-1:] == '>':
-                       return str[1:-1]
-       return str
-
+    """Remove quotes from a string."""
+    if len(str) > 1:
+        if str[0] == '"' and str[-1:] == '"':
+            return str[1:-1]
+        if str[0] == '<' and str[-1:] == '>':
+            return str[1:-1]
+    return str
  
-# Add quotes around a string.
  
  def quote(str):
-       return '"%s"' % string.join(
-           string.split(
-               string.join(
-                   string.split(str, '\\'),
-                   '\\\\'),
-               '"'),
-           '\\"')
+    """Add quotes around a string."""
+    return '"%s"' % string.join(
+    string.split(
+    string.join(
+    string.split(str, '\\'),
+    '\\\\'),
+    '"'),
+    '\\"')
  
  
-# External interface to parse an address
-
  def parseaddr(address):
+    """Parse an address into a (realname, mailaddr) tuple."""
      a = AddrlistClass(address)
      list = a.getaddrlist()
      if not list:
-       return (None, None)
+        return (None, None)
      else:
-       return list[0]
-
+        return list[0]
  
-# Address parser class by Ben Escoto
  
  class AddrlistClass:
-
+    """Address parser class by Ben Escoto.
+    
+    To understand what this class does, it helps to have a copy of
+    RFC-822 in front of you.
+    """
+    
      def __init__(self, field):
-
-       self.specials = '()<>@,:;.\"[]'
-       self.pos = 0
-       self.LWS = ' \t'
-       self.CR = '\r'
-       self.atomends = self.specials + self.LWS + self.CR
-
-       self.field = field
-       self.commentlist = []
-
-
+        """Initialize a new instance.
+        
+        `field' is an unparsed address header field, containing
+        one or more addresses.
+        """
+        self.specials = '()<>@,:;.\"[]'
+        self.pos = 0
+        self.LWS = ' \t'
+        self.CR = '\r'
+        self.atomends = self.specials + self.LWS + self.CR
+        
+        self.field = field
+        self.commentlist = []
+    
      def gotonext(self):
-
-       while self.pos < len(self.field):
-           if self.field[self.pos] in self.LWS + '\n\r':
-               self.pos = self.pos + 1
-           elif self.field[self.pos] == '(':
-               self.commentlist.append(self.getcomment())
-           else: break
-
+        """Parse up to the start of the next address."""
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS + '\n\r':
+                self.pos = self.pos + 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            else: break
+    
      def getaddrlist(self):
-
-       ad = self.getaddress()
-       if ad:
-           return ad + self.getaddrlist()
-       else: return []
-
+        """Parse all addresses.
+        
+        Returns a list containing all of the addresses.
+        """
+        ad = self.getaddress()
+        if ad:
+            return ad + self.getaddrlist()
+        else: return []
+    
      def getaddress(self):
-       self.commentlist = []
-       self.gotonext()
-
-       oldpos = self.pos
-       oldcl = self.commentlist
-       plist = self.getphraselist()
-
-       self.gotonext()
-       returnlist = []
-
-       if self.pos >= len(self.field):
-           # Bad email address technically, no domain.
-           if plist:
-               returnlist = [(string.join(self.commentlist), plist[0])]
-
-       elif self.field[self.pos] in '.@':
-           # email address is just an addrspec
-           # this isn't very efficient since we start over
-           self.pos = oldpos
-           self.commentlist = oldcl
-           addrspec = self.getaddrspec()
-           returnlist = [(string.join(self.commentlist), addrspec)]
-
-       elif self.field[self.pos] == ':':
-           # address is a group
-           returnlist = []
-
-           self.pos = self.pos + 1
-           while self.pos < len(self.field):
-               self.gotonext()
-               if self.field[self.pos] == ';':
-                   self.pos = self.pos + 1
-                   break
-               returnlist = returnlist + self.getaddress()
-
-       elif self.field[self.pos] == '<':
-           # Address is a phrase then a route addr
-           routeaddr = self.getrouteaddr()
-           
-           if self.commentlist:
-               returnlist = [(string.join(plist) + ' (' + \
-                        string.join(self.commentlist) + ')', routeaddr)]
-           else: returnlist = [(string.join(plist), routeaddr)]
-
-       else:
-           if plist:
-               returnlist = [(string.join(self.commentlist), plist[0])]
-
-       self.gotonext()
-       if self.pos < len(self.field) and self.field[self.pos] == ',':
-           self.pos = self.pos + 1
-       return returnlist
-
-
+        """Parse the next address."""
+        self.commentlist = []
+        self.gotonext()
+        
+        oldpos = self.pos
+        oldcl = self.commentlist
+        plist = self.getphraselist()
+        
+        self.gotonext()
+        returnlist = []
+        
+        if self.pos >= len(self.field):
+            # Bad email address technically, no domain.
+            if plist:
+                returnlist = [(string.join(self.commentlist), plist[0])]
+            
+        elif self.field[self.pos] in '.@':
+            # email address is just an addrspec
+            # this isn't very efficient since we start over
+            self.pos = oldpos
+            self.commentlist = oldcl
+            addrspec = self.getaddrspec()
+            returnlist = [(string.join(self.commentlist), addrspec)]
+            
+        elif self.field[self.pos] == ':':
+            # address is a group
+            returnlist = []
+            
+            self.pos = self.pos + 1
+            while self.pos < len(self.field):
+                self.gotonext()
+                if self.field[self.pos] == ';':
+                    self.pos = self.pos + 1
+                    break
+                returnlist = returnlist + self.getaddress()
+            
+        elif self.field[self.pos] == '<':
+            # Address is a phrase then a route addr
+            routeaddr = self.getrouteaddr()
+            
+            if self.commentlist:
+                returnlist = [(string.join(plist) + ' (' + \
+                         string.join(self.commentlist) + ')', routeaddr)]
+            else: returnlist = [(string.join(plist), routeaddr)]
+            
+        else:
+            if plist:
+                returnlist = [(string.join(self.commentlist), plist[0])]
+        
+        self.gotonext()
+        if self.pos < len(self.field) and self.field[self.pos] == ',':
+            self.pos = self.pos + 1
+        return returnlist
+    
      def getrouteaddr(self):
-       # This just skips all the route stuff and returns the addrspec
-       if self.field[self.pos] != '<':
-           return
-
-       expectroute = 0
-       self.pos = self.pos + 1
-       self.gotonext()
-       while self.pos < len(self.field):
-           if expectroute:
-               self.getdomain()
-               expectroute = 0
-           elif self.field[self.pos] == '>':
-               self.pos = self.pos + 1
-               break
-           elif self.field[self.pos] == '@':
-               self.pos = self.pos + 1
-               expectroute = 1
-           elif self.field[self.pos] == ':':
-               self.pos = self.pos + 1
-               expectaddrspec = 1
-           else:
-               adlist = self.getaddrspec()
-               self.pos = self.pos + 1
-               break
-           self.gotonext()
-
-       return adlist
-
-
+        """Parse a route address (Return-path value).
+        
+        This method just skips all the route stuff and returns the addrspec.
+        """
+        if self.field[self.pos] != '<':
+            return
+        
+        expectroute = 0
+        self.pos = self.pos + 1
+        self.gotonext()
+        while self.pos < len(self.field):
+            if expectroute:
+                self.getdomain()
+                expectroute = 0
+            elif self.field[self.pos] == '>':
+                self.pos = self.pos + 1
+                break
+            elif self.field[self.pos] == '@':
+                self.pos = self.pos + 1
+                expectroute = 1
+            elif self.field[self.pos] == ':':
+                self.pos = self.pos + 1
+                expectaddrspec = 1
+            else:
+                adlist = self.getaddrspec()
+                self.pos = self.pos + 1
+                break
+            self.gotonext()
+        
+        return adlist
+    
      def getaddrspec(self):
-
-       aslist = []
-
-       self.gotonext()
-       while self.pos < len(self.field):
-           if self.field[self.pos] == '.':
-               aslist.append('.')
-               self.pos = self.pos + 1
-           elif self.field[self.pos] == '"':
-               aslist.append(self.getquote())
-           elif self.field[self.pos] in self.atomends:
-               break
-           else: aslist.append(self.getatom())
-           self.gotonext()
-
-       if self.pos >= len(self.field) or self.field[self.pos] != '@':
-           return string.join(aslist, '')
-
-       aslist.append('@')
-       self.pos = self.pos + 1
-       self.gotonext()
-       return string.join(aslist, '') + self.getdomain()
-
-
+        """Parse an RFC-822 addr-spec."""
+        aslist = []
+        
+        self.gotonext()
+        while self.pos < len(self.field):
+            if self.field[self.pos] == '.':
+                aslist.append('.')
+                self.pos = self.pos + 1
+            elif self.field[self.pos] == '"':
+                aslist.append(self.getquote())
+            elif self.field[self.pos] in self.atomends:
+                break
+            else: aslist.append(self.getatom())
+            self.gotonext()
+        
+        if self.pos >= len(self.field) or self.field[self.pos] != '@':
+            return string.join(aslist, '')
+        
+        aslist.append('@')
+        self.pos = self.pos + 1
+        self.gotonext()
+        return string.join(aslist, '') + self.getdomain()
+    
      def getdomain(self):
-
-       sdlist = []
-       while self.pos < len(self.field):
-           if self.field[self.pos] in self.LWS:
-               self.pos = self.pos + 1
-           elif self.field[self.pos] == '(':
-               self.commentlist.append(self.getcomment())
-           elif self.field[self.pos] == '[':
-               sdlist.append(self.getdomainliteral())
-           elif self.field[self.pos] == '.':
-               self.pos = self.pos + 1
-               sdlist.append('.')
-           elif self.field[self.pos] in self.atomends:
-               break
-           else: sdlist.append(self.getatom())
-
-       return string.join(sdlist, '')
-
-
+        """Get the complete domain name from an address."""
+        sdlist = []
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS:
+                self.pos = self.pos + 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] == '[':
+                sdlist.append(self.getdomainliteral())
+            elif self.field[self.pos] == '.':
+                self.pos = self.pos + 1
+                sdlist.append('.')
+            elif self.field[self.pos] in self.atomends:
+                break
+            else: sdlist.append(self.getatom())
+        
+        return string.join(sdlist, '')
+    
      def getdelimited(self, beginchar, endchars, allowcomments = 1):
-
-       if self.field[self.pos] != beginchar:
-           return ''
-
-       slist = ['']
-       quote = 0
-       self.pos = self.pos + 1
-       while self.pos < len(self.field):
-           if quote == 1:
-               slist.append(self.field[self.pos])
-               quote = 0
-           elif self.field[self.pos] in endchars:
-               self.pos = self.pos + 1
-               break
-           elif allowcomments and self.field[self.pos] == '(':
-               slist.append(self.getcomment())
-           elif self.field[self.pos] == '\\':
-               quote = 1
-           else:
-               slist.append(self.field[self.pos])
-           self.pos = self.pos + 1
-
-       return string.join(slist, '')
-
+        """Parse a header fragment delimited by special characters.
+        
+        `beginchar' is the start character for the fragment.
+        If self is not looking at an instance of `beginchar' then
+        getdelimited returns the empty string.
+        
+        `endchars' is a sequence of allowable end-delimiting characters.
+        Parsing stops when one of these is encountered.
+        
+        If `allowcomments' is non-zero, embedded RFC-822 comments
+        are allowed within the parsed fragment.
+        """
+        if self.field[self.pos] != beginchar:
+            return ''
+        
+        slist = ['']
+        quote = 0
+        self.pos = self.pos + 1
+        while self.pos < len(self.field):
+            if quote == 1:
+                slist.append(self.field[self.pos])
+                quote = 0
+            elif self.field[self.pos] in endchars:
+                self.pos = self.pos + 1
+                break
+            elif allowcomments and self.field[self.pos] == '(':
+                slist.append(self.getcomment())
+            elif self.field[self.pos] == '\\':
+                quote = 1
+            else:
+                slist.append(self.field[self.pos])
+            self.pos = self.pos + 1
+        
+        return string.join(slist, '')
+    
      def getquote(self):
-       return self.getdelimited('"', '"\r', 0)
-
+        """Get a quote-delimited fragment from self's field."""
+        return self.getdelimited('"', '"\r', 0)
+    
      def getcomment(self):
-       return self.getdelimited('(', ')\r', 1)
-
+        """Get a parenthesis-delimited fragment from self's field."""
+        return self.getdelimited('(', ')\r', 1)
+    
      def getdomainliteral(self):
-       return self.getdelimited('[', ']\r', 0)
-
-
+        """Parse an RFC-822 domain-literal."""
+        return self.getdelimited('[', ']\r', 0)
+    
      def getatom(self):
-
-       atomlist = ['']
-
-       while self.pos < len(self.field):
-           if self.field[self.pos] in self.atomends:
-               break
-           else: atomlist.append(self.field[self.pos])
-           self.pos = self.pos + 1
-
-       return string.join(atomlist, '')
-
-
+        """Parse an RFC-822 atom."""
+        atomlist = ['']
+        
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.atomends:
+                break
+            else: atomlist.append(self.field[self.pos])
+            self.pos = self.pos + 1
+        
+        return string.join(atomlist, '')
+    
      def getphraselist(self):
-
-       plist = []
-
-       while self.pos < len(self.field):
-           if self.field[self.pos] in self.LWS:
-               self.pos = self.pos + 1
-           elif self.field[self.pos] == '"':
-               plist.append(self.getquote())
-           elif self.field[self.pos] == '(':
-               self.commentlist.append(self.getcomment())
-           elif self.field[self.pos] in self.atomends:
-               break
-           else: plist.append(self.getatom())
-
-       return plist
+        """Parse a sequence of RFC-822 phrases.
+        
+        A phrase is a sequence of words, which are in turn either
+        RFC-822 atoms or quoted-strings.
+        """
+        plist = []
+        
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS:
+                self.pos = self.pos + 1
+            elif self.field[self.pos] == '"':
+                plist.append(self.getquote())
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] in self.atomends:
+                break
+            else: plist.append(self.getatom())
+        
+        return plist
  
  
  # Parse a date field
  
  _monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
-         'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+              'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  _daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
  
  # The timezone table does not include the military time zones defined
@@ -572,116 +615,125 @@ _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
               'CST': -600, 'CDT':-500,   # Centreal
               'MST':-700, 'MDT':-600,    # Mountain
               'PST':-800, 'PDT':-700     # Pacific
-            }    
+             }    
+
  
  def parsedate_tz(data):
-       data = string.split(data)
-       if data[0][-1] == ',' or data[0] in _daynames:
-               # There's a dayname here. Skip it
-               del data[0]
-       if len(data) == 3: # RFC 850 date, deprecated
-               stuff = string.split(data[0], '-')
-               if len(stuff) == 3:
-                       data = stuff + data[1:]
-       if len(data) == 4:
-               s = data[3]
-               i = string.find(s, '+')
-               if i > 0:
-                       data[3:] = [s[:i], s[i+1:]]
-               else:
-                       data.append('') # Dummy tz
-       if len(data) < 5:
-               return None
-       data = data[:5]
-       [dd, mm, yy, tm, tz] = data
-       if not mm in _monthnames:
-               dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
-               if not mm in _monthnames:
-                       return None
-       mm = _monthnames.index(mm)+1
-       tm = string.splitfields(tm, ':')
-       if len(tm) == 2:
-               [thh, tmm] = tm
-               tss = '0'
-       else:
-               [thh, tmm, tss] = tm
-       try:
-               yy = string.atoi(yy)
-               dd = string.atoi(dd)
-               thh = string.atoi(thh)
-               tmm = string.atoi(tmm)
-               tss = string.atoi(tss)
-       except string.atoi_error:
-               return None
-       tzoffset=0
-       tz=string.upper(tz)
-       if _timezones.has_key(tz):
-               tzoffset=_timezones[tz]
-       else:
-               try: 
-                       tzoffset=string.atoi(tz)
-               except string.atoi_error: 
-                       pass
-       # Convert a timezone offset into seconds ; -0500 -> -18000
-       if tzoffset<0: tzsign=-1
-       else: tzsign=1
-       tzoffset=tzoffset*tzsign
-       tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
-       tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
-       return tuple
+    """Convert a date string to a time tuple.
+    
+    Accounts for military timezones.
+    """
+    data = string.split(data)
+    if data[0][-1] == ',' or data[0] in _daynames:
+        # There's a dayname here. Skip it
+        del data[0]
+    if len(data) == 3: # RFC 850 date, deprecated
+        stuff = string.split(data[0], '-')
+        if len(stuff) == 3:
+            data = stuff + data[1:]
+    if len(data) == 4:
+        s = data[3]
+        i = string.find(s, '+')
+        if i > 0:
+            data[3:] = [s[:i], s[i+1:]]
+        else:
+            data.append('') # Dummy tz
+    if len(data) < 5:
+        return None
+    data = data[:5]
+    [dd, mm, yy, tm, tz] = data
+    if not mm in _monthnames:
+        dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
+        if not mm in _monthnames:
+            return None
+    mm = _monthnames.index(mm)+1
+    tm = string.splitfields(tm, ':')
+    if len(tm) == 2:
+        [thh, tmm] = tm
+        tss = '0'
+    else:
+        [thh, tmm, tss] = tm
+    try:
+        yy = string.atoi(yy)
+        dd = string.atoi(dd)
+        thh = string.atoi(thh)
+        tmm = string.atoi(tmm)
+        tss = string.atoi(tss)
+    except string.atoi_error:
+        return None
+    tzoffset=0
+    tz=string.upper(tz)
+    if _timezones.has_key(tz):
+        tzoffset=_timezones[tz]
+    else:
+        try: 
+            tzoffset=string.atoi(tz)
+        except string.atoi_error: 
+            pass
+    # Convert a timezone offset into seconds ; -0500 -> -18000
+    if tzoffset<0: tzsign=-1
+    else: tzsign=1
+    tzoffset=tzoffset*tzsign
+    tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
+    tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
+    return tuple
+
  
  def parsedate(data):
-       t=parsedate_tz(data)
-       if type(t)==type( () ):
-               return t[:9]
-       else: return t    
+    """Convert a time string to a time tuple."""
+    t=parsedate_tz(data)
+    if type(t)==type( () ):
+        return t[:9]
+    else: return t    
  
-def mktime_tz(data):
-       """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
  
-       Minor glitch: this first interprets the first 8 elements as a
-       local time and then compensates for the timezone difference;
-       this may yield a slight error around daylight savings time
-       switch dates.  Not enough to worry about for common use.
+def mktime_tz(data):
+    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.
+    
+    Minor glitch: this first interprets the first 8 elements as a
+    local time and then compensates for the timezone difference;
+    this may yield a slight error around daylight savings time
+    switch dates.  Not enough to worry about for common use.
+    
+    """
+    t = time.mktime(data[:8] + (0,))
+    return t + data[9] - time.timezone
  
-       """
-       t = time.mktime(data[:8] + (0,))
-       return t + data[9] - time.timezone
  
  # When used as script, run a small test program.
  # The first command line argument must be a filename containing one
  # message in RFC-822 format.
  
  if __name__ == '__main__':
-       import sys, os
-       file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
-       if sys.argv[1:]: file = sys.argv[1]
-       f = open(file, 'r')
-       m = Message(f)
-       print 'From:', m.getaddr('from')
-       print 'To:', m.getaddrlist('to')
-       print 'Subject:', m.getheader('subject')
-       print 'Date:', m.getheader('date')
-       date = m.getdate_tz('date')
-       if date:
-               print 'ParsedDate:', time.asctime(date[:-1]),
-               hhmmss = date[-1]
-               hhmm, ss = divmod(hhmmss, 60)
-               hh, mm = divmod(hhmm, 60)
-               print "%+03d%02d" % (hh, mm),
-               if ss: print ".%02d" % ss,
-               print
-       else:
-               print 'ParsedDate:', None
-       m.rewindbody()
-       n = 0
-       while f.readline():
-               n = n + 1
-       print 'Lines:', n
-       print '-'*70
-       print 'len =', len(m)
-       if m.has_key('Date'): print 'Date =', m['Date']
-       if m.has_key('X-Nonsense'): pass
-       print 'keys =', m.keys()
-       print 'values =', m.values()
-       print 'items =', m.items()
+    import sys, os
+    file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
+    if sys.argv[1:]: file = sys.argv[1]
+    f = open(file, 'r')
+    m = Message(f)
+    print 'From:', m.getaddr('from')
+    print 'To:', m.getaddrlist('to')
+    print 'Subject:', m.getheader('subject')
+    print 'Date:', m.getheader('date')
+    date = m.getdate_tz('date')
+    if date:
+        print 'ParsedDate:', time.asctime(date[:-1]),
+        hhmmss = date[-1]
+        hhmm, ss = divmod(hhmmss, 60)
+        hh, mm = divmod(hhmm, 60)
+        print "%+03d%02d" % (hh, mm),
+        if ss: print ".%02d" % ss,
+        print
+    else:
+        print 'ParsedDate:', None
+    m.rewindbody()
+    n = 0
+    while f.readline():
+        n = n + 1
+    print 'Lines:', n
+    print '-'*70
+    print 'len =', len(m)
+    if m.has_key('Date'): print 'Date =', m['Date']
+    if m.has_key('X-Nonsense'): pass
+    print 'keys =', m.keys()
+    print 'values =', m.values()
+    print 'items =', m.items()
author	Guido van Rossum <guido@python.org>
	Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)
committer	Guido van Rossum <guido@python.org>
	Wed, 10 Dec 1997 16:17:39 +0000 (16:17 +0000)