From: Barry Warsaw Date: Thu, 18 Jul 2002 23:09:09 +0000 (+0000) Subject: Anthony Baxter's cleanup patch. Python project SF patch # 583190, X-Git-Tag: v2.3c1~4941 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d;p=python Anthony Baxter's cleanup patch. Python project SF patch # 583190, quoting: in non-strict mode, messages don't require a blank line at the end with a missing end-terminator. A single newline is sufficient now. Handle trailing whitespace at the end of a boundary. Had to switch from using string.split() to re.split() Handle whitespace on the end of a parameter list for Content-type. Handle whitespace on the end of a plain content-type header. Specifically, get_type(): Strip the content type string. _get_params_preserve(): Strip the parameter names and values on both sides. _parsebody(): Lots of changes as described above, with some stylistic changes by Barry (who hopefully didn't screw things up ;). --- diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 5e8d32fd85..fb121a9212 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -373,7 +373,7 @@ class Message: value = self.get('content-type', missing) if value is missing: return failobj - return paramre.split(value)[0].lower() + return paramre.split(value)[0].lower().strip() def get_main_type(self, failobj=None): """Return the message's main content type if present.""" @@ -428,11 +428,11 @@ class Message: for p in paramre.split(value): try: name, val = p.split('=', 1) - name = name.rstrip() - val = val.lstrip() + name = name.strip() + val = val.strip() except ValueError: # Must have been a bare attribute - name = p + name = p.strip() val = '' params.append((name, val)) params = Utils.decode_params(params) diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 228adbcebe..308110796c 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -124,19 +124,25 @@ class Parser: if boundary: preamble = epilogue = None # Split into subparts. The first boundary we're looking for won't - # have the leading newline since we're at the start of the body - # text. + # always have a leading newline since we're at the start of the + # body text, and there's not always a preamble before the first + # boundary. separator = '--' + boundary payload = fp.read() - start = payload.find(separator) - if start < 0: + # We use an RE here because boundaries can have trailing + # whitespace. + mo = re.search( + r'(?P' + re.escape(separator) + r')(?P[ \t]*)', + payload) + if not mo: raise Errors.BoundaryError( "Couldn't find starting boundary: %s" % boundary) + start = mo.start() if start > 0: # there's some pre-MIME boundary preamble preamble = payload[0:start] # Find out what kind of line endings we're using - start += len(separator) + start += len(mo.group('sep')) + len(mo.group('ws')) cre = re.compile('\r\n|\r|\n') mo = cre.search(payload, start) if mo: @@ -151,31 +157,32 @@ class Parser: terminator = mo.start() linesep = mo.group('sep') if mo.end() < len(payload): - # there's some post-MIME boundary epilogue + # There's some post-MIME boundary epilogue epilogue = payload[mo.end():] elif self._strict: raise Errors.BoundaryError( "Couldn't find terminating boundary: %s" % boundary) else: - # handle the case of no trailing boundary. I hate mail clients. - # check that it ends in a blank line - endre = re.compile('(?P\r\n|\r|\n){2}$') - mo = endre.search(payload) + # Handle the case of no trailing boundary. Check that it ends + # in a blank line. Some cases (spamspamspam) don't even have + # that! + mo = re.search('(?P\r\n|\r|\n){2}$', payload) if not mo: - raise Errors.BoundaryError( - "Couldn't find terminating boundary, and no "+ - "trailing empty line") - else: - linesep = mo.group('sep') - terminator = len(payload) + mo = re.search('(?P\r\n|\r|\n)$', payload) + if not mo: + raise Errors.BoundaryError( + 'No terminating boundary and no trailing empty line') + linesep = mo.group('sep') + terminator = len(payload) # We split the textual payload on the boundary separator, which # includes the trailing newline. If the container is a # multipart/digest then the subparts are by default message/rfc822 # instead of text/plain. In that case, they'll have a optional # block of MIME headers, then an empty line followed by the # message headers. - separator += linesep - parts = payload[start:terminator].split(linesep + separator) + parts = re.split( + linesep + re.escape(separator) + r'[ \t]*' + linesep, + payload[start:terminator]) for part in parts: if isdigest: if part[0] == linesep: