]> granicus.if.org Git - python/commitdiff
Allow "@" in unquoted attribute values.
authorFred Drake <fdrake@acm.org>
Tue, 29 Apr 2003 22:12:55 +0000 (22:12 +0000)
committerFred Drake <fdrake@acm.org>
Tue, 29 Apr 2003 22:12:55 +0000 (22:12 +0000)
Added test that checks for characters allowed in the query part of URLs.
Backport candidate.

Lib/sgmllib.py
Lib/test/test_sgmllib.py

index 4a8c3b4ca61353222c93f44ea77da010ae2123ff..833e06f98da006f6c802e88667a31d6ce8f1e79b 100644 (file)
@@ -33,7 +33,7 @@ endbracket = re.compile('[<>]')
 tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
 attrfind = re.compile(
     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"]*))?')
+    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
 
 
 class SGMLParseError(RuntimeError):
index 6f4454f8f3fb1f3485dc2dfbee0ca2d3d41acabd..6a77e076fabff32d38f63906cfffbd3249a3b2d2 100644 (file)
@@ -200,6 +200,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
         self.check_events("""<a b='' c="">""", [
             ("starttag", "a", [("b", ""), ("c", "")]),
             ])
+        # URL construction stuff from RFC 1808:
+        safe = "$-_.+"
+        extra = "!*'(),"
+        reserved = ";/?:@&="
+        url = "http://example.com:8080/path/to/file?%s%s%s" % (
+            safe, extra, reserved)
+        self.check_events("""<e a=%s>""" % url, [
+            ("starttag", "e", [("a", url)]),
+            ])
         # Regression test for SF patch #669683.
         self.check_events("<e a=rgb(1,2,3)>", [
             ("starttag", "e", [("a", "rgb(1,2,3)")]),