Issue #20440: More use of Py_SETREF.

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)
diff --cc Modules/_ctypes/_ctypes.c
Simple merge
diff --cc Modules/_elementtree.c

index f69ce2976667aabd44b11d7420fcc58353291772,11d1aece4fa00163518b1516d7cb43626f45820f..f16d48f829f1cbe744efca9e27bf582c9d5b91ba
--- 1/Modules/_elementtree.c
--- 2/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@@ -1911,92 -1879,92 +1909,90 @@@ element_ass_subscr(PyObject* self_, PyO
   }
   
   static PyObject*
- -element_getattro(ElementObject* self, PyObject* nameobj)
+ +element_tag_getter(ElementObject *self, void *closure)
   {
- -    PyObject* res;
- -    char *name = "";
+ +    PyObject *res = self->tag;
+ +    Py_INCREF(res);
+ +    return res;
+ +}
   
- -    if (PyUnicode_Check(nameobj))
- -        name = _PyUnicode_AsString(nameobj);
+ +static PyObject*
+ +element_text_getter(ElementObject *self, void *closure)
+ +{
+ +    PyObject *res = element_get_text(self);
+ +    Py_XINCREF(res);
+ +    return res;
+ +}
   
- -    if (name == NULL)
- -        return NULL;
+ +static PyObject*
+ +element_tail_getter(ElementObject *self, void *closure)
+ +{
+ +    PyObject *res = element_get_tail(self);
+ +    Py_XINCREF(res);
+ +    return res;
+ +}
   
- -    /* handle common attributes first */
- -    if (strcmp(name, "tag") == 0) {
- -        res = self->tag;
- -        Py_INCREF(res);
- -        return res;
- -    } else if (strcmp(name, "text") == 0) {
- -        res = element_get_text(self);
- -        Py_XINCREF(res);
- -        return res;
+ +static PyObject*
+ +element_attrib_getter(ElementObject *self, void *closure)
+ +{
+ +    PyObject *res;
+ +    if (!self->extra) {
+ +        if (create_extra(self, NULL) < 0)
+ +            return NULL;
       }
+ +    res = element_get_attrib(self);
+ +    Py_XINCREF(res);
+ +    return res;
+ +}
   
- -    /* methods */
- -    res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
- -    if (res)
- -        return res;
- -
- -    /* less common attributes */
- -    if (strcmp(name, "tail") == 0) {
- -        PyErr_Clear();
- -        res = element_get_tail(self);
- -    } else if (strcmp(name, "attrib") == 0) {
- -        PyErr_Clear();
- -        if (!self->extra) {
- -            if (create_extra(self, NULL) < 0)
- -                return NULL;
- -        }
- -        res = element_get_attrib(self);
+ +/* macro for setter validation */
+ +#define _VALIDATE_ATTR_VALUE(V)                     \
+ +    if ((V) == NULL) {                              \
+ +        PyErr_SetString(                            \
+ +            PyExc_AttributeError,                   \
+ +            "can't delete element attribute");      \
+ +        return -1;                                  \
       }
   
- -    if (!res)
- -        return NULL;
- -
- -    Py_INCREF(res);
- -    return res;
+ +static int
+ +element_tag_setter(ElementObject *self, PyObject *value, void *closure)
+ +{
+ +    _VALIDATE_ATTR_VALUE(value);
+ +    Py_INCREF(value);
-     Py_DECREF(self->tag);
-     self->tag = value;
++    Py_SETREF(self->tag, value);
+ +    return 0;
   }
   
   static int
- -element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
+ +element_text_setter(ElementObject *self, PyObject *value, void *closure)
   {
- -    char *name = "";
+ +    _VALIDATE_ATTR_VALUE(value);
+ +    Py_INCREF(value);
+ +    Py_DECREF(JOIN_OBJ(self->text));
+ +    self->text = value;
+ +    return 0;
+ +}
   
- -    if (value == NULL) {
- -        PyErr_SetString(PyExc_AttributeError,
- -            "can't delete attribute");
- -        return -1;
- -    }
- -    if (PyUnicode_Check(nameobj))
- -        name = _PyUnicode_AsString(nameobj);
- -    if (name == NULL)
- -        return -1;
+ +static int
+ +element_tail_setter(ElementObject *self, PyObject *value, void *closure)
+ +{
+ +    _VALIDATE_ATTR_VALUE(value);
+ +    Py_INCREF(value);
+ +    Py_DECREF(JOIN_OBJ(self->tail));
+ +    self->tail = value;
+ +    return 0;
+ +}
   
- -    if (strcmp(name, "tag") == 0) {
- -        Py_INCREF(value);
- -        Py_SETREF(self->tag, value);
- -    } else if (strcmp(name, "text") == 0) {
- -        Py_DECREF(JOIN_OBJ(self->text));
- -        self->text = value;
- -        Py_INCREF(self->text);
- -    } else if (strcmp(name, "tail") == 0) {
- -        Py_DECREF(JOIN_OBJ(self->tail));
- -        self->tail = value;
- -        Py_INCREF(self->tail);
- -    } else if (strcmp(name, "attrib") == 0) {
- -        if (!self->extra) {
- -            if (create_extra(self, NULL) < 0)
- -                return -1;
- -        }
- -        Py_INCREF(value);
- -        Py_SETREF(self->extra->attrib, value);
- -    } else {
- -        PyErr_SetString(PyExc_AttributeError,
- -            "Can't set arbitrary attributes on Element");
- -        return -1;
+ +static int
+ +element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
+ +{
+ +    _VALIDATE_ATTR_VALUE(value);
+ +    if (!self->extra) {
+ +        if (create_extra(self, NULL) < 0)
+ +            return -1;
       }
- -
+ +    Py_INCREF(value);
-     Py_DECREF(self->extra->attrib);
-     self->extra->attrib = value;
++    Py_SETREF(self->extra->attrib, value);
       return 0;
   }
   
diff --cc Modules/_sqlite/cursor.c
Simple merge
diff --cc Modules/zlibmodule.c
Simple merge
diff --cc Objects/unicodeobject.c
Simple merge
diff --cc Python/ast.c

index 328ee5d9141815277f82da188f0045ba9bb213eb,7743c31c1fbdf7a35b6feb0457faba13316ab197..33f2597fc0e3988718134de7cfc8243aa01b632f
--- 1/Python/ast.c
--- 2/Python/ast.c
+++ b/Python/ast.c
@@@ -4002,873 -3999,12 +4002,869 @@@ decode_unicode(struct compiling *c, con
       return v;
   }
   
- -/* s is a Python string literal, including the bracketing quote characters,
- - * and r &/or b prefixes (if any), and embedded escape sequences (if any).
- - * parsestr parses it, and returns the decoded Python string object.
- - */
+ +/* Compile this expression in to an expr_ty. We know that we can
+ +   temporarily modify the character before the start of this string
+ +   (it's '{'), and we know we can temporarily modify the character
+ +   after this string (it is a '}').  Leverage this to create a
+ +   sub-string with enough room for us to add parens around the
+ +   expression. This is to allow strings with embedded newlines, for
+ +   example. */
+ +static expr_ty
+ +fstring_compile_expr(PyObject *str, Py_ssize_t expr_start,
+ +                     Py_ssize_t expr_end, struct compiling *c, const node *n)
+ +
+ +{
+ +    PyCompilerFlags cf;
+ +    mod_ty mod;
+ +    char *utf_expr;
+ +    Py_ssize_t i;
+ +    Py_UCS4 end_ch = -1;
+ +    int all_whitespace;
+ +    PyObject *sub = NULL;
+ +
+ +    /* We only decref sub if we allocated it with a PyUnicode_Substring.
+ +       decref_sub records that. */
+ +    int decref_sub = 0;
+ +
+ +    assert(str);
+ +
+ +    assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str));
+ +    assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str));
+ +    assert(expr_end >= expr_start);
+ +
+ +    /* There has to be at least one character on each side of the
+ +       expression inside this str. This will have been caught before
+ +       we're called. */
+ +    assert(expr_start >= 1);
+ +    assert(expr_end <= PyUnicode_GET_LENGTH(str)-1);
+ +
+ +    /* If the substring is all whitespace, it's an error. We need to
+ +        catch this here, and not when we call PyParser_ASTFromString,
+ +        because turning the expression '' in to '()' would go from
+ +        being invalid to valid. */
+ +    /* Note that this code says an empty string is all
+ +        whitespace. That's important. There's a test for it: f'{}'. */
+ +    all_whitespace = 1;
+ +    for (i = expr_start; i < expr_end; i++) {
+ +        if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) {
+ +            all_whitespace = 0;
+ +            break;
+ +        }
+ +    }
+ +    if (all_whitespace) {
+ +        ast_error(c, n, "f-string: empty expression not allowed");
+ +        goto error;
+ +    }
+ +
+ +    /* If the substring will be the entire source string, we can't use
+ +        PyUnicode_Substring, since it will return another reference to
+ +        our original string. Because we're modifying the string in
+ +        place, that's a no-no. So, detect that case and just use our
+ +        string directly. */
+ +
+ +    if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) {
+ +        /* If str is well formed, then the first and last chars must
+ +           be '{' and '}', respectively. But, if there's a syntax
+ +           error, for example f'{3!', then the last char won't be a
+ +           closing brace. So, remember the last character we read in
+ +           order for us to restore it. */
+ +        end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1);
+ +        assert(end_ch != (Py_UCS4)-1);
+ +
+ +        /* In all cases, however, start_ch must be '{'. */
+ +        assert(PyUnicode_ReadChar(str, 0) == '{');
+ +
+ +        sub = str;
+ +    } else {
+ +        /* Create a substring object. It must be a new object, with
+ +           refcount==1, so that we can modify it. */
+ +        sub = PyUnicode_Substring(str, expr_start-1, expr_end+1);
+ +        if (!sub)
+ +            goto error;
+ +        assert(sub != str);  /* Make sure it's a new string. */
+ +        decref_sub = 1;      /* Remember to deallocate it on error. */
+ +    }
+ +
+ +    /* Put () around the expression. */
+ +    if (PyUnicode_WriteChar(sub, 0, '(') < 0 ||
+ +        PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0)
+ +        goto error;
+ +
+ +    /* No need to free the memory returned here: it's managed by the
+ +       string. */
+ +    utf_expr = PyUnicode_AsUTF8(sub);
+ +    if (!utf_expr)
+ +        goto error;
+ +
+ +    cf.cf_flags = PyCF_ONLY_AST;
+ +    mod = PyParser_ASTFromString(utf_expr, "<fstring>",
+ +                                 Py_eval_input, &cf, c->c_arena);
+ +    if (!mod)
+ +        goto error;
+ +
+ +    if (sub != str)
+ +        /* Clear instead of decref in case we ever modify this code to change
+ +           the error handling: this is safest because the XDECREF won't try
+ +           and decref it when it's NULL. */
+ +        /* No need to restore the chars in sub, since we know it's getting
+ +           ready to get deleted (refcount must be 1, since we got a new string
+ +           in PyUnicode_Substring). */
+ +        Py_CLEAR(sub);
+ +    else {
+ +        assert(!decref_sub);
+ +        assert(end_ch != (Py_UCS4)-1);
+ +        /* Restore str, which we earlier modified directly. */
+ +        if (PyUnicode_WriteChar(str, 0, '{') < 0 ||
+ +            PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0)
+ +            goto error;
+ +    }
+ +    return mod->v.Expression.body;
+ +
+ +error:
+ +    /* Only decref sub if it was the result of a call to SubString. */
+ +    if (decref_sub)
+ +        Py_XDECREF(sub);
+ +
+ +    if (end_ch != (Py_UCS4)-1) {
+ +        /* We only get here if we modified str. Make sure that's the
+ +           case: str will be equal to sub. */
+ +        if (str == sub) {
+ +            /* Don't check the error, because we've already set the
+ +               error state (that's why we're in 'error', after
+ +               all). */
+ +            PyUnicode_WriteChar(str, 0, '{');
+ +            PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch);
+ +        }
+ +    }
+ +    return NULL;
+ +}
+ +
+ +/* Return -1 on error.
+ +
+ +   Return 0 if we reached the end of the literal.
+ +
+ +   Return 1 if we haven't reached the end of the literal, but we want
+ +   the caller to process the literal up to this point. Used for
+ +   doubled braces.
+ +*/
+ +static int
+ +fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
+ +                     int recurse_lvl, struct compiling *c, const node *n)
+ +{
+ +    /* Get any literal string. It ends when we hit an un-doubled brace, or the
+ +       end of the string. */
+ +
+ +    Py_ssize_t literal_start, literal_end;
+ +    int result = 0;
+ +
+ +    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
+ +    void *data = PyUnicode_DATA(str);
+ +
+ +    assert(*literal == NULL);
+ +
+ +    literal_start = *ofs;
+ +    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+ +        Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs);
+ +        if (ch == '{' || ch == '}') {
+ +            /* Check for doubled braces, but only at the top level. If
+ +               we checked at every level, then f'{0:{3}}' would fail
+ +               with the two closing braces. */
+ +            if (recurse_lvl == 0) {
+ +                if (*ofs + 1 < PyUnicode_GET_LENGTH(str) &&
+ +                    PyUnicode_READ(kind, data, *ofs + 1) == ch) {
+ +                    /* We're going to tell the caller that the literal ends
+ +                       here, but that they should continue scanning. But also
+ +                       skip over the second brace when we resume scanning. */
+ +                    literal_end = *ofs + 1;
+ +                    *ofs += 2;
+ +                    result = 1;
+ +                    goto done;
+ +                }
+ +
+ +                /* Where a single '{' is the start of a new expression, a
+ +                   single '}' is not allowed. */
+ +                if (ch == '}') {
+ +                    ast_error(c, n, "f-string: single '}' is not allowed");
+ +                    return -1;
+ +                }
+ +            }
+ +
+ +            /* We're either at a '{', which means we're starting another
+ +               expression; or a '}', which means we're at the end of this
+ +               f-string (for a nested format_spec). */
+ +            break;
+ +        }
+ +    }
+ +    literal_end = *ofs;
+ +
+ +    assert(*ofs == PyUnicode_GET_LENGTH(str) ||
+ +           PyUnicode_READ(kind, data, *ofs) == '{' ||
+ +           PyUnicode_READ(kind, data, *ofs) == '}');
+ +done:
+ +    if (literal_start != literal_end) {
+ +        *literal = PyUnicode_Substring(str, literal_start, literal_end);
+ +        if (!*literal)
+ +            return -1;
+ +    }
+ +
+ +    return result;
+ +}
+ +
+ +/* Forward declaration because parsing is recursive. */
+ +static expr_ty
+ +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+ +              struct compiling *c, const node *n);
+ +
+ +/* Parse the f-string str, starting at ofs. We know *ofs starts an
+ +   expression (so it must be a '{'). Returns the FormattedValue node,
+ +   which includes the expression, conversion character, and
+ +   format_spec expression.
+ +
+ +   Note that I don't do a perfect job here: I don't make sure that a
+ +   closing brace doesn't match an opening paren, for example. It
+ +   doesn't need to error on all invalid expressions, just correctly
+ +   find the end of all valid ones. Any errors inside the expression
+ +   will be caught when we parse it later. */
+ +static int
+ +fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+ +                  expr_ty *expression, struct compiling *c, const node *n)
+ +{
+ +    /* Return -1 on error, else 0. */
+ +
+ +    Py_ssize_t expr_start;
+ +    Py_ssize_t expr_end;
+ +    expr_ty simple_expression;
+ +    expr_ty format_spec = NULL; /* Optional format specifier. */
+ +    Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */
+ +
+ +    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
+ +    void *data = PyUnicode_DATA(str);
+ +
+ +    /* 0 if we're not in a string, else the quote char we're trying to
+ +       match (single or double quote). */
+ +    Py_UCS4 quote_char = 0;
+ +
+ +    /* If we're inside a string, 1=normal, 3=triple-quoted. */
+ +    int string_type = 0;
+ +
+ +    /* Keep track of nesting level for braces/parens/brackets in
+ +       expressions. */
+ +    Py_ssize_t nested_depth = 0;
+ +
+ +    /* Can only nest one level deep. */
+ +    if (recurse_lvl >= 2) {
+ +        ast_error(c, n, "f-string: expressions nested too deeply");
+ +        return -1;
+ +    }
+ +
+ +    /* The first char must be a left brace, or we wouldn't have gotten
+ +       here. Skip over it. */
+ +    assert(PyUnicode_READ(kind, data, *ofs) == '{');
+ +    *ofs += 1;
+ +
+ +    expr_start = *ofs;
+ +    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+ +        Py_UCS4 ch;
+ +
+ +        /* Loop invariants. */
+ +        assert(nested_depth >= 0);
+ +        assert(*ofs >= expr_start);
+ +        if (quote_char)
+ +            assert(string_type == 1 || string_type == 3);
+ +        else
+ +            assert(string_type == 0);
+ +
+ +        ch = PyUnicode_READ(kind, data, *ofs);
+ +        if (quote_char) {
+ +            /* We're inside a string. See if we're at the end. */
+ +            /* This code needs to implement the same non-error logic
+ +               as tok_get from tokenizer.c, at the letter_quote
+ +               label. To actually share that code would be a
+ +               nightmare. But, it's unlikely to change and is small,
+ +               so duplicate it here. Note we don't need to catch all
+ +               of the errors, since they'll be caught when parsing the
+ +               expression. We just need to match the non-error
+ +               cases. Thus we can ignore \n in single-quoted strings,
+ +               for example. Or non-terminated strings. */
+ +            if (ch == quote_char) {
+ +                /* Does this match the string_type (single or triple
+ +                   quoted)? */
+ +                if (string_type == 3) {
+ +                    if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+ +                        PyUnicode_READ(kind, data, *ofs+1) == ch &&
+ +                        PyUnicode_READ(kind, data, *ofs+2) == ch) {
+ +                        /* We're at the end of a triple quoted string. */
+ +                        *ofs += 2;
+ +                        string_type = 0;
+ +                        quote_char = 0;
+ +                        continue;
+ +                    }
+ +                } else {
+ +                    /* We're at the end of a normal string. */
+ +                    quote_char = 0;
+ +                    string_type = 0;
+ +                    continue;
+ +                }
+ +            }
+ +            /* We're inside a string, and not finished with the
+ +               string. If this is a backslash, skip the next char (it
+ +               might be an end quote that needs skipping). Otherwise,
+ +               just consume this character normally. */
+ +            if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) {
+ +                /* Just skip the next char, whatever it is. */
+ +                *ofs += 1;
+ +            }
+ +        } else if (ch == '\'' || ch == '"') {
+ +            /* Is this a triple quoted string? */
+ +            if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+ +                PyUnicode_READ(kind, data, *ofs+1) == ch &&
+ +                PyUnicode_READ(kind, data, *ofs+2) == ch) {
+ +                string_type = 3;
+ +                *ofs += 2;
+ +            } else {
+ +                /* Start of a normal string. */
+ +                string_type = 1;
+ +            }
+ +            /* Start looking for the end of the string. */
+ +            quote_char = ch;
+ +        } else if (ch == '[' || ch == '{' || ch == '(') {
+ +            nested_depth++;
+ +        } else if (nested_depth != 0 &&
+ +                   (ch == ']' || ch == '}' || ch == ')')) {
+ +            nested_depth--;
+ +        } else if (ch == '#') {
+ +            /* Error: can't include a comment character, inside parens
+ +               or not. */
+ +            ast_error(c, n, "f-string cannot include '#'");
+ +            return -1;
+ +        } else if (nested_depth == 0 &&
+ +                   (ch == '!' || ch == ':' || ch == '}')) {
+ +            /* First, test for the special case of "!=". Since '=' is
+ +               not an allowed conversion character, nothing is lost in
+ +               this test. */
+ +            if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) &&
+ +                  PyUnicode_READ(kind, data, *ofs+1) == '=')
+ +                /* This isn't a conversion character, just continue. */
+ +                continue;
+ +
+ +            /* Normal way out of this loop. */
+ +            break;
+ +        } else {
+ +            /* Just consume this char and loop around. */
+ +        }
+ +    }
+ +    expr_end = *ofs;
+ +    /* If we leave this loop in a string or with mismatched parens, we
+ +       don't care. We'll get a syntax error when compiling the
+ +       expression. But, we can produce a better error message, so
+ +       let's just do that.*/
+ +    if (quote_char) {
+ +        ast_error(c, n, "f-string: unterminated string");
+ +        return -1;
+ +    }
+ +    if (nested_depth) {
+ +        ast_error(c, n, "f-string: mismatched '(', '{', or '['");
+ +        return -1;
+ +    }
+ +
+ +    if (*ofs >= PyUnicode_GET_LENGTH(str))
+ +        goto unexpected_end_of_string;
+ +
+ +    /* Compile the expression as soon as possible, so we show errors
+ +       related to the expression before errors related to the
+ +       conversion or format_spec. */
+ +    simple_expression = fstring_compile_expr(str, expr_start, expr_end, c, n);
+ +    if (!simple_expression)
+ +        return -1;
+ +
+ +    /* Check for a conversion char, if present. */
+ +    if (PyUnicode_READ(kind, data, *ofs) == '!') {
+ +        *ofs += 1;
+ +        if (*ofs >= PyUnicode_GET_LENGTH(str))
+ +            goto unexpected_end_of_string;
+ +
+ +        conversion = PyUnicode_READ(kind, data, *ofs);
+ +        *ofs += 1;
+ +
+ +        /* Validate the conversion. */
+ +        if (!(conversion == 's' || conversion == 'r'
+ +              || conversion == 'a')) {
+ +            ast_error(c, n, "f-string: invalid conversion character: "
+ +                            "expected 's', 'r', or 'a'");
+ +            return -1;
+ +        }
+ +    }
+ +
+ +    /* Check for the format spec, if present. */
+ +    if (*ofs >= PyUnicode_GET_LENGTH(str))
+ +        goto unexpected_end_of_string;
+ +    if (PyUnicode_READ(kind, data, *ofs) == ':') {
+ +        *ofs += 1;
+ +        if (*ofs >= PyUnicode_GET_LENGTH(str))
+ +            goto unexpected_end_of_string;
+ +
+ +        /* Parse the format spec. */
+ +        format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n);
+ +        if (!format_spec)
+ +            return -1;
+ +    }
+ +
+ +    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
+ +          PyUnicode_READ(kind, data, *ofs) != '}')
+ +        goto unexpected_end_of_string;
+ +
+ +    /* We're at a right brace. Consume it. */
+ +    assert(*ofs < PyUnicode_GET_LENGTH(str));
+ +    assert(PyUnicode_READ(kind, data, *ofs) == '}');
+ +    *ofs += 1;
+ +
+ +    /* And now create the FormattedValue node that represents this entire
+ +       expression with the conversion and format spec. */
+ +    *expression = FormattedValue(simple_expression, (int)conversion,
+ +                                 format_spec, LINENO(n), n->n_col_offset,
+ +                                 c->c_arena);
+ +    if (!*expression)
+ +        return -1;
+ +
+ +    return 0;
+ +
+ +unexpected_end_of_string:
+ +    ast_error(c, n, "f-string: expecting '}'");
+ +    return -1;
+ +}
+ +
+ +/* Return -1 on error.
+ +
+ +   Return 0 if we have a literal (possible zero length) and an
+ +   expression (zero length if at the end of the string.
+ +
+ +   Return 1 if we have a literal, but no expression, and we want the
+ +   caller to call us again. This is used to deal with doubled
+ +   braces.
+ +
+ +   When called multiple times on the string 'a{{b{0}c', this function
+ +   will return:
+ +
+ +   1. the literal 'a{' with no expression, and a return value
+ +      of 1. Despite the fact that there's no expression, the return
+ +      value of 1 means we're not finished yet.
+ +
+ +   2. the literal 'b' and the expression '0', with a return value of
+ +      0. The fact that there's an expression means we're not finished.
+ +
+ +   3. literal 'c' with no expression and a return value of 0. The
+ +      combination of the return value of 0 with no expression means
+ +      we're finished.
+ +*/
+ +static int
+ +fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+ +                              PyObject **literal, expr_ty *expression,
+ +                              struct compiling *c, const node *n)
+ +{
+ +    int result;
+ +
+ +    assert(*literal == NULL && *expression == NULL);
+ +
+ +    /* Get any literal string. */
+ +    result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n);
+ +    if (result < 0)
+ +        goto error;
+ +
+ +    assert(result == 0 || result == 1);
+ +
+ +    if (result == 1)
+ +        /* We have a literal, but don't look at the expression. */
+ +        return 1;
+ +
+ +    assert(*ofs <= PyUnicode_GET_LENGTH(str));
+ +
+ +    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
+ +        PyUnicode_READ_CHAR(str, *ofs) == '}')
+ +        /* We're at the end of the string or the end of a nested
+ +           f-string: no expression. The top-level error case where we
+ +           expect to be at the end of the string but we're at a '}' is
+ +           handled later. */
+ +        return 0;
+ +
+ +    /* We must now be the start of an expression, on a '{'. */
+ +    assert(*ofs < PyUnicode_GET_LENGTH(str) &&
+ +           PyUnicode_READ_CHAR(str, *ofs) == '{');
+ +
+ +    if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0)
+ +        goto error;
+ +
+ +    return 0;
+ +
+ +error:
-     Py_XDECREF(*literal);
-     *literal = NULL;
++    Py_CLEAR(*literal);
+ +    return -1;
+ +}
+ +
+ +#define EXPRLIST_N_CACHED  64
+ +
+ +typedef struct {
+ +    /* Incrementally build an array of expr_ty, so be used in an
+ +       asdl_seq. Cache some small but reasonably sized number of
+ +       expr_ty's, and then after that start dynamically allocating,
+ +       doubling the number allocated each time. Note that the f-string
+ +       f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
+ +       Str for the literal 'a'. So you add expr_ty's about twice as
+ +       fast as you add exressions in an f-string. */
+ +
+ +    Py_ssize_t allocated;  /* Number we've allocated. */
+ +    Py_ssize_t size;       /* Number we've used. */
+ +    expr_ty    *p;         /* Pointer to the memory we're actually
+ +                              using. Will point to 'data' until we
+ +                              start dynamically allocating. */
+ +    expr_ty    data[EXPRLIST_N_CACHED];
+ +} ExprList;
+ +
+ +#ifdef NDEBUG
+ +#define ExprList_check_invariants(l)
+ +#else
+ +static void
+ +ExprList_check_invariants(ExprList *l)
+ +{
+ +    /* Check our invariants. Make sure this object is "live", and
+ +       hasn't been deallocated. */
+ +    assert(l->size >= 0);
+ +    assert(l->p != NULL);
+ +    if (l->size <= EXPRLIST_N_CACHED)
+ +        assert(l->data == l->p);
+ +}
+ +#endif
+ +
+ +static void
+ +ExprList_Init(ExprList *l)
+ +{
+ +    l->allocated = EXPRLIST_N_CACHED;
+ +    l->size = 0;
+ +
+ +    /* Until we start allocating dynamically, p points to data. */
+ +    l->p = l->data;
+ +
+ +    ExprList_check_invariants(l);
+ +}
+ +
+ +static int
+ +ExprList_Append(ExprList *l, expr_ty exp)
+ +{
+ +    ExprList_check_invariants(l);
+ +    if (l->size >= l->allocated) {
+ +        /* We need to alloc (or realloc) the memory. */
+ +        Py_ssize_t new_size = l->allocated * 2;
+ +
+ +        /* See if we've ever allocated anything dynamically. */
+ +        if (l->p == l->data) {
+ +            Py_ssize_t i;
+ +            /* We're still using the cached data. Switch to
+ +               alloc-ing. */
+ +            l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
+ +            if (!l->p)
+ +                return -1;
+ +            /* Copy the cached data into the new buffer. */
+ +            for (i = 0; i < l->size; i++)
+ +                l->p[i] = l->data[i];
+ +        } else {
+ +            /* Just realloc. */
+ +            expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
+ +            if (!tmp) {
+ +                PyMem_RawFree(l->p);
+ +                l->p = NULL;
+ +                return -1;
+ +            }
+ +            l->p = tmp;
+ +        }
+ +
+ +        l->allocated = new_size;
+ +        assert(l->allocated == 2 * l->size);
+ +    }
+ +
+ +    l->p[l->size++] = exp;
+ +
+ +    ExprList_check_invariants(l);
+ +    return 0;
+ +}
+ +
+ +static void
+ +ExprList_Dealloc(ExprList *l)
+ +{
+ +    ExprList_check_invariants(l);
+ +
+ +    /* If there's been an error, or we've never dynamically allocated,
+ +       do nothing. */
+ +    if (!l->p || l->p == l->data) {
+ +        /* Do nothing. */
+ +    } else {
+ +        /* We have dynamically allocated. Free the memory. */
+ +        PyMem_RawFree(l->p);
+ +    }
+ +    l->p = NULL;
+ +    l->size = -1;
+ +}
+ +
+ +static asdl_seq *
+ +ExprList_Finish(ExprList *l, PyArena *arena)
+ +{
+ +    asdl_seq *seq;
+ +
+ +    ExprList_check_invariants(l);
+ +
+ +    /* Allocate the asdl_seq and copy the expressions in to it. */
+ +    seq = _Py_asdl_seq_new(l->size, arena);
+ +    if (seq) {
+ +        Py_ssize_t i;
+ +        for (i = 0; i < l->size; i++)
+ +            asdl_seq_SET(seq, i, l->p[i]);
+ +    }
+ +    ExprList_Dealloc(l);
+ +    return seq;
+ +}
+ +
+ +/* The FstringParser is designed to add a mix of strings and
+ +   f-strings, and concat them together as needed. Ultimately, it
+ +   generates an expr_ty. */
+ +typedef struct {
+ +    PyObject *last_str;
+ +    ExprList expr_list;
+ +} FstringParser;
+ +
+ +#ifdef NDEBUG
+ +#define FstringParser_check_invariants(state)
+ +#else
+ +static void
+ +FstringParser_check_invariants(FstringParser *state)
+ +{
+ +    if (state->last_str)
+ +        assert(PyUnicode_CheckExact(state->last_str));
+ +    ExprList_check_invariants(&state->expr_list);
+ +}
+ +#endif
+ +
+ +static void
+ +FstringParser_Init(FstringParser *state)
+ +{
+ +    state->last_str = NULL;
+ +    ExprList_Init(&state->expr_list);
+ +    FstringParser_check_invariants(state);
+ +}
+ +
+ +static void
+ +FstringParser_Dealloc(FstringParser *state)
+ +{
+ +    FstringParser_check_invariants(state);
+ +
+ +    Py_XDECREF(state->last_str);
+ +    ExprList_Dealloc(&state->expr_list);
+ +}
+ +
+ +/* Make a Str node, but decref the PyUnicode object being added. */
+ +static expr_ty
+ +make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
+ +{
+ +    PyObject *s = *str;
+ +    *str = NULL;
+ +    assert(PyUnicode_CheckExact(s));
+ +    if (PyArena_AddPyObject(c->c_arena, s) < 0) {
+ +        Py_DECREF(s);
+ +        return NULL;
+ +    }
+ +    return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
+ +}
+ +
+ +/* Add a non-f-string (that is, a regular literal string). str is
+ +   decref'd. */
+ +static int
+ +FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
+ +{
+ +    FstringParser_check_invariants(state);
+ +
+ +    assert(PyUnicode_CheckExact(str));
+ +
+ +    if (PyUnicode_GET_LENGTH(str) == 0) {
+ +        Py_DECREF(str);
+ +        return 0;
+ +    }
+ +
+ +    if (!state->last_str) {
+ +        /* We didn't have a string before, so just remember this one. */
+ +        state->last_str = str;
+ +    } else {
+ +        /* Concatenate this with the previous string. */
-         PyObject *temp = PyUnicode_Concat(state->last_str, str);
-         Py_DECREF(state->last_str);
-         Py_DECREF(str);
-         state->last_str = temp;
-         if (!temp)
++        PyUnicode_AppendAndDel(&state->last_str, str);
++        if (!state->last_str)
+ +            return -1;
+ +    }
+ +    FstringParser_check_invariants(state);
+ +    return 0;
+ +}
+ +
+ +/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f'
+ +   or quotes. str is not decref'd, since we don't know if it's used elsewhere.
+ +   And if we're only looking at a part of a string, then decref'ing is
+ +   definitely not the right thing to do! */
+ +static int
+ +FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
+ +                            Py_ssize_t *ofs, int recurse_lvl,
+ +                            struct compiling *c, const node *n)
+ +{
+ +    FstringParser_check_invariants(state);
+ +
+ +    /* Parse the f-string. */
+ +    while (1) {
+ +        PyObject *literal = NULL;
+ +        expr_ty expression = NULL;
+ +
+ +        /* If there's a zero length literal in front of the
+ +           expression, literal will be NULL. If we're at the end of
+ +           the f-string, expression will be NULL (unless result == 1,
+ +           see below). */
+ +        int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl,
+ +                                                   &literal, &expression,
+ +                                                   c, n);
+ +        if (result < 0)
+ +            return -1;
+ +
+ +        /* Add the literal, if any. */
+ +        if (!literal) {
+ +            /* Do nothing. Just leave last_str alone (and possibly
+ +               NULL). */
+ +        } else if (!state->last_str) {
+ +            state->last_str = literal;
+ +            literal = NULL;
+ +        } else {
+ +            /* We have a literal, concatenate it. */
+ +            assert(PyUnicode_GET_LENGTH(literal) != 0);
+ +            if (FstringParser_ConcatAndDel(state, literal) < 0)
+ +                return -1;
+ +            literal = NULL;
+ +        }
+ +        assert(!state->last_str ||
+ +               PyUnicode_GET_LENGTH(state->last_str) != 0);
+ +
+ +        /* We've dealt with the literal now. It can't be leaked on further
+ +           errors. */
+ +        assert(literal == NULL);
+ +
+ +        /* See if we should just loop around to get the next literal
+ +           and expression, while ignoring the expression this
+ +           time. This is used for un-doubling braces, as an
+ +           optimization. */
+ +        if (result == 1)
+ +            continue;
+ +
+ +        if (!expression)
+ +            /* We're done with this f-string. */
+ +            break;
+ +
+ +        /* We know we have an expression. Convert any existing string
+ +           to a Str node. */
+ +        if (!state->last_str) {
+ +            /* Do nothing. No previous literal. */
+ +        } else {
+ +            /* Convert the existing last_str literal to a Str node. */
+ +            expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+ +            if (!str || ExprList_Append(&state->expr_list, str) < 0)
+ +                return -1;
+ +        }
+ +
+ +        if (ExprList_Append(&state->expr_list, expression) < 0)
+ +            return -1;
+ +    }
+ +
+ +    assert(*ofs <= PyUnicode_GET_LENGTH(str));
+ +
+ +    /* If recurse_lvl is zero, then we must be at the end of the
+ +       string. Otherwise, we must be at a right brace. */
+ +
+ +    if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) {
+ +        ast_error(c, n, "f-string: unexpected end of string");
+ +        return -1;
+ +    }
+ +    if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') {
+ +        ast_error(c, n, "f-string: expecting '}'");
+ +        return -1;
+ +    }
+ +
+ +    FstringParser_check_invariants(state);
+ +    return 0;
+ +}
+ +
+ +/* Convert the partial state reflected in last_str and expr_list to an
+ +   expr_ty. The expr_ty can be a Str, or a JoinedStr. */
+ +static expr_ty
+ +FstringParser_Finish(FstringParser *state, struct compiling *c,
+ +                     const node *n)
+ +{
+ +    asdl_seq *seq;
+ +
+ +    FstringParser_check_invariants(state);
+ +
+ +    /* If we're just a constant string with no expressions, return
+ +       that. */
+ +    if(state->expr_list.size == 0) {
+ +        if (!state->last_str) {
+ +            /* Create a zero length string. */
+ +            state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
+ +            if (!state->last_str)
+ +                goto error;
+ +        }
+ +        return make_str_node_and_del(&state->last_str, c, n);
+ +    }
+ +
+ +    /* Create a Str node out of last_str, if needed. It will be the
+ +       last node in our expression list. */
+ +    if (state->last_str) {
+ +        expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+ +        if (!str || ExprList_Append(&state->expr_list, str) < 0)
+ +            goto error;
+ +    }
+ +    /* This has already been freed. */
+ +    assert(state->last_str == NULL);
+ +
+ +    seq = ExprList_Finish(&state->expr_list, c->c_arena);
+ +    if (!seq)
+ +        goto error;
+ +
+ +    /* If there's only one expression, return it. Otherwise, we need
+ +       to join them together. */
+ +    if (seq->size == 1)
+ +        return seq->elements[0];
+ +
+ +    return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena);
+ +
+ +error:
+ +    FstringParser_Dealloc(state);
+ +    return NULL;
+ +}
+ +
+ +/* Given an f-string (with no 'f' or quotes) that's in str starting at
+ +   ofs, parse it into an expr_ty. Return NULL on error. Does not
+ +   decref str. */
+ +static expr_ty
+ +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+ +              struct compiling *c, const node *n)
+ +{
+ +    FstringParser state;
+ +
+ +    FstringParser_Init(&state);
+ +    if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl,
+ +                                    c, n) < 0) {
+ +        FstringParser_Dealloc(&state);
+ +        return NULL;
+ +    }
+ +
+ +    return FstringParser_Finish(&state, c, n);
+ +}
+ +
+ +/* n is a Python string literal, including the bracketing quote
+ +   characters, and r, b, u, &/or f prefixes (if any), and embedded
+ +   escape sequences (if any). parsestr parses it, and returns the
+ +   decoded Python string object.  If the string is an f-string, set
+ +   *fmode and return the unparsed string object.
+ +*/
   static PyObject *
- -parsestr(struct compiling *c, const node *n, int *bytesmode)
+ +parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
   {
       size_t len;
       const char *s = STR(n);
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 27 Dec 2015 13:44:33 +0000 (15:44 +0200)
		1	2
Modules/_ctypes/_ctypes.c	patch \|	diff1 \|	diff2 \|	blob \| history
Modules/_elementtree.c	patch \|	diff1 \|	diff2 \|	blob \| history
Modules/_sqlite/cursor.c	patch \|	diff1 \|	diff2 \|	blob \| history
Modules/zlibmodule.c	patch \|	diff1 \|	diff2 \|	blob \| history
Objects/unicodeobject.c	patch \|	diff1 \|	diff2 \|	blob \| history
Python/ast.c	patch \|	diff1 \|	diff2 \|	blob \| history