From 248b04383fafdb126794e989a2c023a16eb0f429 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Mon, 3 Dec 2001 17:09:50 +0000 Subject: [PATCH] Convert to using string methods instead of the string module. In goahead(), use a bound version of rawdata.startswith() since we use the same method all the time and never change the value of rawdata. This can save a lot of bound method creation. --- Lib/HTMLParser.py | 54 ++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 08c53b3ff2..bd6a885fcb 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -10,7 +10,6 @@ import markupbase import re -import string # Regular expressions used for parsing @@ -23,7 +22,6 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') starttagopen = re.compile('<[a-zA-Z]') piclose = re.compile('>') -endtagopen = re.compile('') tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') attrfind = re.compile( @@ -96,7 +94,6 @@ class HTMLParser(markupbase.ParserBase): def reset(self): """Reset this instance. Loses all unprocessed data.""" self.rawdata = '' - self.stack = [] self.lasttag = '???' self.interesting = interesting_normal markupbase.ParserBase.reset(self) @@ -145,18 +142,19 @@ class HTMLParser(markupbase.ParserBase): if i < j: self.handle_data(rawdata[i:j]) i = self.updatepos(i, j) if i == n: break - if rawdata[i] == '<': + startswith = rawdata.startswith + if startswith('<', i): if starttagopen.match(rawdata, i): # < + letter k = self.parse_starttag(i) - elif endtagopen.match(rawdata, i): # = 0: self.clear_cdata_mode() - elif rawdata.startswith("