Issue #13748: Raw bytes literals can now be written with the `rb` prefix as well...

author Antoine Pitrou <solipsis@pitrou.net>

Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)

committer Antoine Pitrou <solipsis@pitrou.net>

Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)
author Antoine Pitrou <solipsis@pitrou.net>
Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)
committer Antoine Pitrou <solipsis@pitrou.net>
Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst

index 5900daa3e2d1da6f6630e0a488961498bf4cb730..c20c47e33dc17b7fcf256dd68fba425b3b928fd6 100644 (file)
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -412,7 +412,7 @@ String literals are described by the following lexical definitions:
  
  .. productionlist::
     bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`)
-   bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR"
+   bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
     shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"'
     longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""'
     shortbytesitem: `shortbyteschar` | `bytesescapeseq`
@@ -446,6 +446,10 @@ or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
  literal characters.  As a result, in string literals, ``'\U'`` and ``'\u'``
  escapes in raw strings are not treated specially.
  
+   .. versionadded:: 3.3
+      The ``'rb'`` prefix of raw bytes literals has been added as a synonym
+      of ``'br'``.
+
  In triple-quoted strings, unescaped newlines and quotes are allowed (and are
  retained), except that three unescaped quotes in a row terminate the string.  (A
  "quote" is the character used to open the string, i.e. either ``'`` or ``"``.)
diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py

index dbf86524aedb6e5e7c17bce2d12467504fc0361d..a6033a4b57a325fcd5a0f59f1b4de302a74ac1f5 100644 (file)
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -2,10 +2,10 @@ r"""Test correct treatment of various string literals by the parser.
  
  There are four types of string literals:
  
-    'abc'   -- normal str
-    r'abc'  -- raw str
-    b'xyz'  -- normal bytes
-    br'xyz' -- raw bytes
+    'abc'             -- normal str
+    r'abc'            -- raw str
+    b'xyz'            -- normal bytes
+    br'xyz' | rb'xyz' -- raw bytes
  
  The difference between normal and raw strings is of course that in a
  raw string, \ escapes (while still used to determine the end of the
@@ -103,12 +103,25 @@ class TestLiterals(unittest.TestCase):
  
      def test_eval_bytes_raw(self):
          self.assertEqual(eval(""" br'x' """), b'x')
+        self.assertEqual(eval(""" rb'x' """), b'x')
          self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
+        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
          self.assertEqual(eval(""" br'\x01' """), byte(1))
+        self.assertEqual(eval(""" rb'\x01' """), byte(1))
          self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
+        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
          self.assertRaises(SyntaxError, eval, """ br'\x81' """)
+        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
          self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
+        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
          self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
+        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
+        self.assertRaises(SyntaxError, eval, """ bb'' """)
+        self.assertRaises(SyntaxError, eval, """ rr'' """)
+        self.assertRaises(SyntaxError, eval, """ brr'' """)
+        self.assertRaises(SyntaxError, eval, """ bbr'' """)
+        self.assertRaises(SyntaxError, eval, """ rrb'' """)
+        self.assertRaises(SyntaxError, eval, """ rbb'' """)
  
      def check_encoding(self, encoding, extra=""):
          modname = "xx_" + encoding.replace("-", "_")
diff --git a/Lib/test/tokenize_tests.txt b/Lib/test/tokenize_tests.txt

index 06c83b0a8482f9247136e3b0fc4f099e329088c1..2c5fb1057657aae835367892c6bdbfd39931746b 100644 (file)
--- a/Lib/test/tokenize_tests.txt
+++ b/Lib/test/tokenize_tests.txt
@@ -114,8 +114,12 @@ x = b'abc' + B'ABC'
  y = b"abc" + B"ABC"
  x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
  y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
+x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC'
+y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC"
  x = br'\\' + BR'\\'
+x = rb'\\' + RB'\\'
  x = br'\'' + ''
+x = rb'\'' + ''
  y = br'''
  foo bar \\
  baz''' + BR'''
@@ -124,6 +128,10 @@ y = Br"""foo
  bar \\ baz
  """ + bR'''spam
  '''
+y = rB"""foo
+bar \\ baz
+""" + Rb'''spam
+'''
  
  # Indentation
  if 1:
diff --git a/Misc/NEWS b/Misc/NEWS

index 4cfccaede0836ca20b914327df3b52ec97d9c6fe..d930f158fd937194f6f7d13cee8032c410af74ef 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
  Core and Builtins
  -----------------
  
+- Issue #13748: Raw bytes literals can now be written with the ``rb`` prefix
+  as well as ``br``.
+
  - Issue #12736: Use full unicode case mappings for upper, lower, and title case.
  
  - Issue #12760: Add a create mode to open(). Patch by David Townshend.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index c3b2f35db5ee78ef40bec8958d9e031118ba901f..55f431323cf83920d7f039f9d4922c31e3ea9379 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1412,13 +1412,15 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
      /* Identifier (most frequent token!) */
      nonascii = 0;
      if (is_potential_identifier_start(c)) {
-        /* Process b"", r"" and br"" */
-        if (c == 'b' || c == 'B') {
-            c = tok_nextc(tok);
-            if (c == '"' || c == '\'')
-                goto letter_quote;
-        }
-        if (c == 'r' || c == 'R') {
+        /* Process b"", r"", br"" and rb"" */
+        int saw_b = 0, saw_r = 0;
+        while (1) {
+            if (!saw_b && (c == 'b' || c == 'B'))
+                saw_b = 1;
+            else if (!saw_r && (c == 'r' || c == 'R'))
+                saw_r = 1;
+            else
+                break;
              c = tok_nextc(tok);
              if (c == '"' || c == '\'')
                  goto letter_quote;
diff --git a/Python/ast.c b/Python/ast.c

index 48aef4815db2ff76a5fb638ec15f1fe7ea50f3bd..110754bfd47047514b17957672d0d33225e71978 100644 (file)
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3744,13 +3744,18 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
      int rawmode = 0;
      int need_encoding;
      if (isalpha(quote)) {
-        if (quote == 'b' || quote == 'B') {
-            quote = *++s;
-            *bytesmode = 1;
-        }
-        if (quote == 'r' || quote == 'R') {
-            quote = *++s;
-            rawmode = 1;
+        while (!*bytesmode || !rawmode) {
+            if (quote == 'b' || quote == 'B') {
+                quote = *++s;
+                *bytesmode = 1;
+            }
+            else if (quote == 'r' || quote == 'R') {
+                quote = *++s;
+                rawmode = 1;
+            }
+            else {
+                break;
+            }
          }
      }
      if (quote != '\'' && quote != '\"') {
author	Antoine Pitrou <solipsis@pitrou.net>
	Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)
committer	Antoine Pitrou <solipsis@pitrou.net>
	Thu, 12 Jan 2012 21:46:19 +0000 (22:46 +0100)
Doc/reference/lexical_analysis.rst		patch \| blob \| history
Lib/test/test_strlit.py		patch \| blob \| history
Lib/test/tokenize_tests.txt		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Parser/tokenizer.c		patch \| blob \| history
Python/ast.c		patch \| blob \| history