[3.6] bpo-29104: Fixed parsing backslashes in f-strings. (GH-490) (#1812)

author Serhiy Storchaka <storchaka@gmail.com>

Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)

committer GitHub <noreply@github.com>

Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)
committer GitHub <noreply@github.com>
Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index 708ed25b526b743c10e12cfc4dba98ef8cac26d0..25730029ae76f17ee9f4543b98fcbbfcc6ab8214 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -361,6 +361,20 @@ f'{a * x()}'"""
          self.assertEqual(f'2\x203', '2 3')
          self.assertEqual(f'\x203', ' 3')
  
+        with self.assertWarns(DeprecationWarning):  # invalid escape sequence
+            value = eval(r"f'\{6*7}'")
+        self.assertEqual(value, '\\42')
+        self.assertEqual(f'\\{6*7}', '\\42')
+        self.assertEqual(fr'\{6*7}', '\\42')
+
+        AMPERSAND = 'spam'
+        # Get the right unicode character (&), or pick up local variable
+        # depending on the number of backslashes.
+        self.assertEqual(f'\N{AMPERSAND}', '&')
+        self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
+
      def test_misformed_unicode_character_name(self):
          # These test are needed because unicode names are parsed
          # differently inside f-strings.
diff --git a/Misc/NEWS b/Misc/NEWS

index 0dda4fa512294836660318f7363ffe9b40f22c1b..09eb4a76df032f3e86bed032f08e03fbef26e372 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.6.2 release candidate 1?
  Core and Builtins
  -----------------
  
+- bpo-29104: Fixed parsing backslashes in f-strings.
+
  - bpo-27945: Fixed various segfaults with dict when input collections are
    mutated during searching, inserting or comparing.  Based on patches by
    Duane Griffin and Tim Mitchell.
diff --git a/Python/ast.c b/Python/ast.c

index 217ea14bf310be55432bfdd7978e1e58b631622c..ed05a1e53bdb5e3585ba5fd27436479a42b08dec 100644 (file)
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4170,9 +4170,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
      while (s < end) {
          if (*s == '\\') {
              *p++ = *s++;
-            if (*s & 0x80) {
+            if (s >= end || *s & 0x80) {
                  strcpy(p, "u005c");
                  p += 5;
+                if (s >= end)
+                    break;
              }
          }
          if (*s & 0x80) { /* XXX inefficient */
@@ -4325,30 +4327,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
         brace (which isn't part of a unicode name escape such as
         "\N{EULER CONSTANT}"), or the end of the string. */
  
-    const char *literal_start = *str;
-    const char *literal_end;
-    int in_named_escape = 0;
+    const char *s = *str;
+    const char *literal_start = s;
      int result = 0;
  
      assert(*literal == NULL);
-    for (; *str < end; (*str)++) {
-        char ch = **str;
-        if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
-            *(*str-2) == '\\' && *(*str-1) == 'N') {
-            in_named_escape = 1;
-        } else if (in_named_escape && ch == '}') {
-            in_named_escape = 0;
-        } else if (ch == '{' || ch == '}') {
+    while (s < end) {
+        char ch = *s++;
+        if (!raw && ch == '\\' && s < end) {
+            ch = *s++;
+            if (ch == 'N') {
+                if (s < end && *s++ == '{') {
+                    while (s < end && *s++ != '}') {
+                    }
+                    continue;
+                }
+                break;
+            }
+            if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+                return -1;
+            }
+        }
+        if (ch == '{' || ch == '}') {
              /* Check for doubled braces, but only at the top level. If
                 we checked at every level, then f'{0:{3}}' would fail
                 with the two closing braces. */
              if (recurse_lvl == 0) {
-                if (*str+1 < end && *(*str+1) == ch) {
+                if (s < end && *s == ch) {
                      /* We're going to tell the caller that the literal ends
                         here, but that they should continue scanning. But also
                         skip over the second brace when we resume scanning. */
-                    literal_end = *str+1;
-                    *str += 2;
+                    *str = s + 1;
                      result = 1;
                      goto done;
                  }
@@ -4356,6 +4365,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
                  /* Where a single '{' is the start of a new expression, a
                     single '}' is not allowed. */
                  if (ch == '}') {
+                    *str = s - 1;
                      ast_error(c, n, "f-string: single '}' is not allowed");
                      return -1;
                  }
@@ -4363,21 +4373,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
              /* We're either at a '{', which means we're starting another
                 expression; or a '}', which means we're at the end of this
                 f-string (for a nested format_spec). */
+            s--;
              break;
          }
      }
-    literal_end = *str;
-    assert(*str <= end);
-    assert(*str == end || **str == '{' || **str == '}');
+    *str = s;
+    assert(s <= end);
+    assert(s == end || *s == '{' || *s == '}');
  done:
-    if (literal_start != literal_end) {
+    if (literal_start != s) {
          if (raw)
              *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
-                                                    literal_end-literal_start,
+                                                    s - literal_start,
                                                      NULL, NULL);
          else
              *literal = decode_unicode_with_escapes(c, n, literal_start,
-                                                   literal_end-literal_start);
+                                                   s - literal_start);
          if (!*literal)
              return -1;
      }
author	Serhiy Storchaka <storchaka@gmail.com>
	Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)
committer	GitHub <noreply@github.com>
	Thu, 25 May 2017 11:18:55 +0000 (14:18 +0300)
Lib/test/test_fstring.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Python/ast.c		patch \| blob \| history