bpo-37050: Remove expr_text from FormattedValue ast node, use Constant node instead...

author Eric V. Smith <ericvsmith@users.noreply.github.com>

Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)

committer GitHub <noreply@github.com>

Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)
author Eric V. Smith <ericvsmith@users.noreply.github.com>
Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)
committer GitHub <noreply@github.com>
Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)
diff --git a/Include/Python-ast.h b/Include/Python-ast.h

index 2fc50e3f53a21f85f63c9873ef59bbd77e4cfcd7..490d3b0846abe748f118f87756aed3e36327820e 100644 (file)
--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
@@ -330,7 +330,6 @@ struct _expr {
              expr_ty value;
              int conversion;
              expr_ty format_spec;
-            string expr_text;
          } FormattedValue;
  
          struct {
@@ -639,10 +638,10 @@ expr_ty _Py_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators,
  expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
                   lineno, int col_offset, int end_lineno, int end_col_offset,
                   PyArena *arena);
-#define FormattedValue(a0, a1, a2, a3, a4, a5, a6, a7, a8) _Py_FormattedValue(a0, a1, a2, a3, a4, a5, a6, a7, a8)
+#define FormattedValue(a0, a1, a2, a3, a4, a5, a6, a7) _Py_FormattedValue(a0, a1, a2, a3, a4, a5, a6, a7)
  expr_ty _Py_FormattedValue(expr_ty value, int conversion, expr_ty format_spec,
-                           string expr_text, int lineno, int col_offset, int
-                           end_lineno, int end_col_offset, PyArena *arena);
+                           int lineno, int col_offset, int end_lineno, int
+                           end_col_offset, PyArena *arena);
  #define JoinedStr(a0, a1, a2, a3, a4, a5) _Py_JoinedStr(a0, a1, a2, a3, a4, a5)
  expr_ty _Py_JoinedStr(asdl_seq * values, int lineno, int col_offset, int
                        end_lineno, int end_col_offset, PyArena *arena);
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index 3484fcecf1c5e2b00d58f5209afcc3e4faa04f70..c9e6e7de5afdbf7482ed3b9b45ab7258d0ab0a98 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1150,6 +1150,24 @@ non-important content
  
          self.assertRaises(SyntaxError, eval, "f'{C=]'")
  
+        # Make sure leading and following text works.
+        x = 'foo'
+        self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y')
+
+        # Make sure whitespace around the = works.
+        self.assertEqual(f'X{x  =}Y', 'Xx  ='+repr(x)+'Y')
+        self.assertEqual(f'X{x=  }Y', 'Xx=  '+repr(x)+'Y')
+        self.assertEqual(f'X{x  =  }Y', 'Xx  =  '+repr(x)+'Y')
+
+        # These next lines contains tabs.  Backslash escapes don't
+        # work in f-strings.
+        # patchcheck doens't like these tabs.  So the only way to test
+        # this will be to dynamically created and exec the f-strings.  But
+        # that's such a hassle I'll save it for another day.  For now, convert
+        # the tabs to spaces just to shut up patchcheck.
+        #self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y')
+        #self.assertEqual(f'X{x =       }Y', 'Xx\t=\t'+repr(x)+'Y')
+
      def test_walrus(self):
          x = 20
          # This isn't an assignment expression, it's 'x', with a format
diff --git a/Lib/test/test_future.py b/Lib/test/test_future.py

index dd148b62956298d49eb876ba638f9614aa9769f8..303c5f7fbed605ebaa867c9b30a70a00b25978ca 100644 (file)
--- a/Lib/test/test_future.py
+++ b/Lib/test/test_future.py
@@ -270,12 +270,6 @@ class AnnotationsFutureTestCase(unittest.TestCase):
          eq("f'{x}'")
          eq("f'{x!r}'")
          eq("f'{x!a}'")
-        eq("f'{x=!r}'")
-        eq("f'{x=:}'")
-        eq("f'{x=:.2f}'")
-        eq("f'{x=!r}'")
-        eq("f'{x=!a}'")
-        eq("f'{x=!s:*^20}'")
          eq('(yield from outside_of_generator)')
          eq('(yield)')
          eq('(yield a + b)')
@@ -290,6 +284,15 @@ class AnnotationsFutureTestCase(unittest.TestCase):
          eq("(x:=10)")
          eq("f'{(x:=10):=10}'")
  
+        # f-strings with '=' don't round trip very well, so set the expected
+        # result explicitely.
+        self.assertAnnotationEqual("f'{x=!r}'", expected="f'x={x!r}'")
+        self.assertAnnotationEqual("f'{x=:}'", expected="f'x={x:}'")
+        self.assertAnnotationEqual("f'{x=:.2f}'", expected="f'x={x:.2f}'")
+        self.assertAnnotationEqual("f'{x=!r}'", expected="f'x={x!r}'")
+        self.assertAnnotationEqual("f'{x=!a}'", expected="f'x={x!a}'")
+        self.assertAnnotationEqual("f'{x=!s:*^20}'", expected="f'x={x!s:*^20}'")
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-27-14-46-24.bpo-37050.7MyZGg.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-27-14-46-24.bpo-37050.7MyZGg.rst

new file mode 100644 (file)

index 0000000..0667c8e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-27-14-46-24.bpo-37050.7MyZGg.rst
@@ -0,0 +1,4 @@
+Improve the AST for "debug" f-strings, which use '=' to print out the source
+of the expression being evaluated.  Delete expr_text from the FormattedValue
+node, and instead use a Constant string node (possibly merged with adjacent
+constant expressions inside the f-string).
diff --git a/Parser/Python.asdl b/Parser/Python.asdl

index 882f5d1eba35bfeb2736173683fe6801ac5922e0..0c00d398b4610eab830c07c133cd8c77266e1eb1 100644 (file)
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -76,7 +76,7 @@ module Python
           -- x < 4 < 3 and (x < 4) < 3
           | Compare(expr left, cmpop* ops, expr* comparators)
           | Call(expr func, expr* args, keyword* keywords)
-         | FormattedValue(expr value, int? conversion, expr? format_spec, string? expr_text)
+         | FormattedValue(expr value, int? conversion, expr? format_spec)
           | JoinedStr(expr* values)
           | Constant(constant value, string? kind)
  
diff --git a/Python/Python-ast.c b/Python/Python-ast.c

index 39a40eedca3267395c1748fded57c5cf06175963..7c8e438658f7fa66c8940224da522cff9379be91 100644 (file)
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -314,12 +314,10 @@ static char *Call_fields[]={
  static PyTypeObject *FormattedValue_type;
  _Py_IDENTIFIER(conversion);
  _Py_IDENTIFIER(format_spec);
-_Py_IDENTIFIER(expr_text);
  static char *FormattedValue_fields[]={
      "value",
      "conversion",
      "format_spec",
-    "expr_text",
  };
  static PyTypeObject *JoinedStr_type;
  static char *JoinedStr_fields[]={
@@ -954,7 +952,7 @@ static int init_types(void)
      Call_type = make_type("Call", expr_type, Call_fields, 3);
      if (!Call_type) return 0;
      FormattedValue_type = make_type("FormattedValue", expr_type,
-                                    FormattedValue_fields, 4);
+                                    FormattedValue_fields, 3);
      if (!FormattedValue_type) return 0;
      JoinedStr_type = make_type("JoinedStr", expr_type, JoinedStr_fields, 1);
      if (!JoinedStr_type) return 0;
@@ -2253,9 +2251,9 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int
  }
  
  expr_ty
-FormattedValue(expr_ty value, int conversion, expr_ty format_spec, string
-               expr_text, int lineno, int col_offset, int end_lineno, int
-               end_col_offset, PyArena *arena)
+FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno,
+               int col_offset, int end_lineno, int end_col_offset, PyArena
+               *arena)
  {
      expr_ty p;
      if (!value) {
@@ -2270,7 +2268,6 @@ FormattedValue(expr_ty value, int conversion, expr_ty format_spec, string
      p->v.FormattedValue.value = value;
      p->v.FormattedValue.conversion = conversion;
      p->v.FormattedValue.format_spec = format_spec;
-    p->v.FormattedValue.expr_text = expr_text;
      p->lineno = lineno;
      p->col_offset = col_offset;
      p->end_lineno = end_lineno;
@@ -3507,11 +3504,6 @@ ast2obj_expr(void* _o)
          if (_PyObject_SetAttrId(result, &PyId_format_spec, value) == -1)
              goto failed;
          Py_DECREF(value);
-        value = ast2obj_string(o->v.FormattedValue.expr_text);
-        if (!value) goto failed;
-        if (_PyObject_SetAttrId(result, &PyId_expr_text, value) == -1)
-            goto failed;
-        Py_DECREF(value);
          break;
      case JoinedStr_kind:
          result = PyType_GenericNew(JoinedStr_type, NULL, NULL);
@@ -7169,7 +7161,6 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
          expr_ty value;
          int conversion;
          expr_ty format_spec;
-        string expr_text;
  
          if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
              return 1;
@@ -7210,22 +7201,8 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
              if (res != 0) goto failed;
              Py_CLEAR(tmp);
          }
-        if (_PyObject_LookupAttrId(obj, &PyId_expr_text, &tmp) < 0) {
-            return 1;
-        }
-        if (tmp == NULL || tmp == Py_None) {
-            Py_CLEAR(tmp);
-            expr_text = NULL;
-        }
-        else {
-            int res;
-            res = obj2ast_string(tmp, &expr_text, arena);
-            if (res != 0) goto failed;
-            Py_CLEAR(tmp);
-        }
-        *out = FormattedValue(value, conversion, format_spec, expr_text,
-                              lineno, col_offset, end_lineno, end_col_offset,
-                              arena);
+        *out = FormattedValue(value, conversion, format_spec, lineno,
+                              col_offset, end_lineno, end_col_offset, arena);
          if (*out == NULL) goto failed;
          return 0;
      }
diff --git a/Python/ast.c b/Python/ast.c

index 625982735775b8b562ce479173f7aae72a63151d..7ffdf4a2a03709225e405b22340cdf574bcda190 100644 (file)
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -5006,10 +5006,16 @@ fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
     closing brace doesn't match an opening paren, for example. It
     doesn't need to error on all invalid expressions, just correctly
     find the end of all valid ones. Any errors inside the expression
-   will be caught when we parse it later. */
+   will be caught when we parse it later.
+
+   *expression is set to the expression.  For an '=' "debug" expression,
+   *expr_text is set to the debug text (the original text of the expression,
+   *including the '=' and any whitespace around it, as a string object).  If
+   *not a debug expression, *expr_text set to NULL. */
  static int
  fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
-                  expr_ty *expression, struct compiling *c, const node *n)
+                  PyObject **expr_text, expr_ty *expression,
+                  struct compiling *c, const node *n)
  {
      /* Return -1 on error, else 0. */
  
@@ -5020,9 +5026,6 @@ fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
      int conversion = -1; /* The conversion char.  Use default if not
                              specified, or !r if using = and no format
                              spec. */
-    int equal_flag = 0; /* Are we using the = feature? */
-    PyObject *expr_text = NULL; /* The text of the expression, used for =. */
-    const char *expr_text_end;
  
      /* 0 if we're not in a string, else the quote char we're trying to
         match (single or double quote). */
@@ -5198,7 +5201,6 @@ fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
         expr_text. */
      if (**str == '=') {
          *str += 1;
-        equal_flag = 1;
  
          /* Skip over ASCII whitespace.  No need to test for end of string
             here, since we know there's at least a trailing quote somewhere
@@ -5206,7 +5208,14 @@ fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
          while (Py_ISSPACE(**str)) {
              *str += 1;
          }
-        expr_text_end = *str;
+
+        /* Set *expr_text to the text of the expression. */
+        *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
+        if (!*expr_text) {
+            goto error;
+        }
+    } else {
+        *expr_text = NULL;
      }
  
      /* Check for a conversion char, if present. */
@@ -5227,17 +5236,6 @@ fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
          }
  
      }
-    if (equal_flag) {
-        Py_ssize_t len = expr_text_end - expr_start;
-        expr_text = PyUnicode_FromStringAndSize(expr_start, len);
-        if (!expr_text) {
-            goto error;
-        }
-        if (PyArena_AddPyObject(c->c_arena, expr_text) < 0) {
-            Py_DECREF(expr_text);
-            goto error;
-        }
-    }
  
      /* Check for the format spec, if present. */
      if (*str >= end)
@@ -5261,16 +5259,16 @@ fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
      assert(**str == '}');
      *str += 1;
  
-    /* If we're in = mode, and have no format spec and no explict conversion,
-       set the conversion to 'r'. */
-    if (equal_flag && format_spec == NULL && conversion == -1) {
+    /* If we're in = mode (detected by non-NULL expr_text), and have no format
+       spec and no explict conversion, set the conversion to 'r'. */
+    if (*expr_text && format_spec == NULL && conversion == -1) {
          conversion = 'r';
      }
  
      /* And now create the FormattedValue node that represents this
         entire expression with the conversion and format spec. */
      *expression = FormattedValue(simple_expression, conversion,
-                                 format_spec, expr_text, LINENO(n),
+                                 format_spec, LINENO(n),
                                   n->n_col_offset, n->n_end_lineno,
                                   n->n_end_col_offset, c->c_arena);
      if (!*expression)
@@ -5313,7 +5311,7 @@ error:
  static int
  fstring_find_literal_and_expr(const char **str, const char *end, int raw,
                                int recurse_lvl, PyObject **literal,
-                              expr_ty *expression,
+                              PyObject **expr_text, expr_ty *expression,
                                struct compiling *c, const node *n)
  {
      int result;
@@ -5341,7 +5339,8 @@ fstring_find_literal_and_expr(const char **str, const char *end, int raw,
      /* We must now be the start of an expression, on a '{'. */
      assert(**str == '{');
  
-    if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
+    if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
+                          expression, c, n) < 0)
          goto error;
  
      return 0;
@@ -5604,7 +5603,7 @@ FstringParser_ConcatFstring(FstringParser *state, const char **str,
  
      /* Parse the f-string. */
      while (1) {
-        PyObject *literal = NULL;
+        PyObject *literal[2] = {NULL, NULL};
          expr_ty expression = NULL;
  
          /* If there's a zero length literal in front of the
@@ -5612,31 +5611,34 @@ FstringParser_ConcatFstring(FstringParser *state, const char **str,
             the f-string, expression will be NULL (unless result == 1,
             see below). */
          int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
-                                                   &literal, &expression,
-                                                   c, n);
+                                                   &literal[0], &literal[1],
+                                                   &expression, c, n);
          if (result < 0)
              return -1;
  
-        /* Add the literal, if any. */
-        if (!literal) {
-            /* Do nothing. Just leave last_str alone (and possibly
-               NULL). */
-        } else if (!state->last_str) {
-            /*  Note that the literal can be zero length, if the
-                input string is "\\\n" or "\\\r", among others. */
-            state->last_str = literal;
-            literal = NULL;
-        } else {
-            /* We have a literal, concatenate it. */
-            assert(PyUnicode_GET_LENGTH(literal) != 0);
-            if (FstringParser_ConcatAndDel(state, literal) < 0)
-                return -1;
-            literal = NULL;
+        /* Add the literals, if any. */
+        for (int i = 0; i < 2; i++) {
+            if (!literal[i]) {
+                /* Do nothing. Just leave last_str alone (and possibly
+                   NULL). */
+            } else if (!state->last_str) {
+                /*  Note that the literal can be zero length, if the
+                    input string is "\\\n" or "\\\r", among others. */
+                state->last_str = literal[i];
+                literal[i] = NULL;
+            } else {
+                /* We have a literal, concatenate it. */
+                assert(PyUnicode_GET_LENGTH(literal[i]) != 0);
+                if (FstringParser_ConcatAndDel(state, literal[i]) < 0)
+                    return -1;
+                literal[i] = NULL;
+            }
          }
  
-        /* We've dealt with the literal now. It can't be leaked on further
+        /* We've dealt with the literals now. They can't be leaked on further
             errors. */
-        assert(literal == NULL);
+        assert(literal[0] == NULL);
+        assert(literal[1] == NULL);
  
          /* See if we should just loop around to get the next literal
             and expression, while ignoring the expression this
diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c

index f1b991a7c387885df6f77240a693c9eb0da38827..f376e86ddc4c0d87af9df3703d17c51851fe69b6 100644 (file)
--- a/Python/ast_unparse.c
+++ b/Python/ast_unparse.c
@@ -665,11 +665,6 @@ append_formattedvalue(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec)
      }
      Py_DECREF(temp_fv_str);
  
-    if (e->v.FormattedValue.expr_text) {
-        /* Use the = for debug text expansion. */
-        APPEND_STR("=");
-    }
-
      if (e->v.FormattedValue.conversion > 0) {
          switch (e->v.FormattedValue.conversion) {
          case 'a':
diff --git a/Python/compile.c b/Python/compile.c

index 425d0d68ac4adc379af72293bc1463cc05fd01b1..f1c97bdfe47f07e77d4e13e15f17dd2e4fa3e0dc 100644 (file)
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -3963,12 +3963,6 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
      int conversion = e->v.FormattedValue.conversion;
      int oparg;
  
-    if (e->v.FormattedValue.expr_text) {
-        /* Push the text of the expression (which already has the '=' in
-           it. */
-        ADDOP_LOAD_CONST(c, e->v.FormattedValue.expr_text);
-    }
-
      /* The expression to be formatted. */
      VISIT(c, expr, e->v.FormattedValue.value);
  
@@ -3991,11 +3985,6 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
      /* And push our opcode and oparg */
      ADDOP_I(c, FORMAT_VALUE, oparg);
  
-    /* If we have expr_text, join the 2 strings on the stack. */
-    if (e->v.FormattedValue.expr_text) {
-        ADDOP_I(c, BUILD_STRING, 2);
-    }
-
      return 1;
  }
author	Eric V. Smith <ericvsmith@users.noreply.github.com>
	Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)
committer	GitHub <noreply@github.com>
	Mon, 27 May 2019 19:31:52 +0000 (15:31 -0400)
Include/Python-ast.h		patch \| blob \| history
Lib/test/test_fstring.py		patch \| blob \| history
Lib/test/test_future.py		patch \| blob \| history
Misc/NEWS.d/next/Core and Builtins/2019-05-27-14-46-24.bpo-37050.7MyZGg.rst	[new file with mode: 0644]	patch \| blob
Parser/Python.asdl		patch \| blob \| history
Python/Python-ast.c		patch \| blob \| history
Python/ast.c		patch \| blob \| history
Python/ast_unparse.c		patch \| blob \| history
Python/compile.c		patch \| blob \| history