From: Serhiy Storchaka Date: Sun, 27 Dec 2015 13:44:33 +0000 (+0200) Subject: Issue #20440: More use of Py_SETREF. X-Git-Tag: v3.6.0a1~860 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=726fc139a5f40d81a0013c856be1283da08de4a0;p=python Issue #20440: More use of Py_SETREF. This patch is manually crafted and contains changes that couldn't be handled automatically. --- 726fc139a5f40d81a0013c856be1283da08de4a0 diff --cc Modules/_elementtree.c index f69ce29766,11d1aece4f..f16d48f829 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@@ -1911,92 -1879,92 +1909,90 @@@ element_ass_subscr(PyObject* self_, PyO } static PyObject* -element_getattro(ElementObject* self, PyObject* nameobj) +element_tag_getter(ElementObject *self, void *closure) { - PyObject* res; - char *name = ""; + PyObject *res = self->tag; + Py_INCREF(res); + return res; +} - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); +static PyObject* +element_text_getter(ElementObject *self, void *closure) +{ + PyObject *res = element_get_text(self); + Py_XINCREF(res); + return res; +} - if (name == NULL) - return NULL; +static PyObject* +element_tail_getter(ElementObject *self, void *closure) +{ + PyObject *res = element_get_tail(self); + Py_XINCREF(res); + return res; +} - /* handle common attributes first */ - if (strcmp(name, "tag") == 0) { - res = self->tag; - Py_INCREF(res); - return res; - } else if (strcmp(name, "text") == 0) { - res = element_get_text(self); - Py_XINCREF(res); - return res; +static PyObject* +element_attrib_getter(ElementObject *self, void *closure) +{ + PyObject *res; + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return NULL; } + res = element_get_attrib(self); + Py_XINCREF(res); + return res; +} - /* methods */ - res = PyObject_GenericGetAttr((PyObject*) self, nameobj); - if (res) - return res; - - /* less common attributes */ - if (strcmp(name, "tail") == 0) { - PyErr_Clear(); - res = element_get_tail(self); - } else if (strcmp(name, "attrib") == 0) { - PyErr_Clear(); - if (!self->extra) { - if (create_extra(self, NULL) < 0) - return NULL; - } - res = element_get_attrib(self); +/* macro for setter validation */ +#define _VALIDATE_ATTR_VALUE(V) \ + if ((V) == NULL) { \ + PyErr_SetString( \ + PyExc_AttributeError, \ + "can't delete element attribute"); \ + return -1; \ } - if (!res) - return NULL; - - Py_INCREF(res); - return res; +static int +element_tag_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + Py_INCREF(value); - Py_DECREF(self->tag); - self->tag = value; ++ Py_SETREF(self->tag, value); + return 0; } static int -element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value) +element_text_setter(ElementObject *self, PyObject *value, void *closure) { - char *name = ""; + _VALIDATE_ATTR_VALUE(value); + Py_INCREF(value); + Py_DECREF(JOIN_OBJ(self->text)); + self->text = value; + return 0; +} - if (value == NULL) { - PyErr_SetString(PyExc_AttributeError, - "can't delete attribute"); - return -1; - } - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); - if (name == NULL) - return -1; +static int +element_tail_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + Py_INCREF(value); + Py_DECREF(JOIN_OBJ(self->tail)); + self->tail = value; + return 0; +} - if (strcmp(name, "tag") == 0) { - Py_INCREF(value); - Py_SETREF(self->tag, value); - } else if (strcmp(name, "text") == 0) { - Py_DECREF(JOIN_OBJ(self->text)); - self->text = value; - Py_INCREF(self->text); - } else if (strcmp(name, "tail") == 0) { - Py_DECREF(JOIN_OBJ(self->tail)); - self->tail = value; - Py_INCREF(self->tail); - } else if (strcmp(name, "attrib") == 0) { - if (!self->extra) { - if (create_extra(self, NULL) < 0) - return -1; - } - Py_INCREF(value); - Py_SETREF(self->extra->attrib, value); - } else { - PyErr_SetString(PyExc_AttributeError, - "Can't set arbitrary attributes on Element"); - return -1; +static int +element_attrib_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return -1; } - + Py_INCREF(value); - Py_DECREF(self->extra->attrib); - self->extra->attrib = value; ++ Py_SETREF(self->extra->attrib, value); return 0; } diff --cc Python/ast.c index 328ee5d914,7743c31c1f..33f2597fc0 --- a/Python/ast.c +++ b/Python/ast.c @@@ -4002,873 -3999,12 +4002,869 @@@ decode_unicode(struct compiling *c, con return v; } -/* s is a Python string literal, including the bracketing quote characters, - * and r &/or b prefixes (if any), and embedded escape sequences (if any). - * parsestr parses it, and returns the decoded Python string object. - */ +/* Compile this expression in to an expr_ty. We know that we can + temporarily modify the character before the start of this string + (it's '{'), and we know we can temporarily modify the character + after this string (it is a '}'). Leverage this to create a + sub-string with enough room for us to add parens around the + expression. This is to allow strings with embedded newlines, for + example. */ +static expr_ty +fstring_compile_expr(PyObject *str, Py_ssize_t expr_start, + Py_ssize_t expr_end, struct compiling *c, const node *n) + +{ + PyCompilerFlags cf; + mod_ty mod; + char *utf_expr; + Py_ssize_t i; + Py_UCS4 end_ch = -1; + int all_whitespace; + PyObject *sub = NULL; + + /* We only decref sub if we allocated it with a PyUnicode_Substring. + decref_sub records that. */ + int decref_sub = 0; + + assert(str); + + assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str)); + assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str)); + assert(expr_end >= expr_start); + + /* There has to be at least one character on each side of the + expression inside this str. This will have been caught before + we're called. */ + assert(expr_start >= 1); + assert(expr_end <= PyUnicode_GET_LENGTH(str)-1); + + /* If the substring is all whitespace, it's an error. We need to + catch this here, and not when we call PyParser_ASTFromString, + because turning the expression '' in to '()' would go from + being invalid to valid. */ + /* Note that this code says an empty string is all + whitespace. That's important. There's a test for it: f'{}'. */ + all_whitespace = 1; + for (i = expr_start; i < expr_end; i++) { + if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) { + all_whitespace = 0; + break; + } + } + if (all_whitespace) { + ast_error(c, n, "f-string: empty expression not allowed"); + goto error; + } + + /* If the substring will be the entire source string, we can't use + PyUnicode_Substring, since it will return another reference to + our original string. Because we're modifying the string in + place, that's a no-no. So, detect that case and just use our + string directly. */ + + if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) { + /* If str is well formed, then the first and last chars must + be '{' and '}', respectively. But, if there's a syntax + error, for example f'{3!', then the last char won't be a + closing brace. So, remember the last character we read in + order for us to restore it. */ + end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1); + assert(end_ch != (Py_UCS4)-1); + + /* In all cases, however, start_ch must be '{'. */ + assert(PyUnicode_ReadChar(str, 0) == '{'); + + sub = str; + } else { + /* Create a substring object. It must be a new object, with + refcount==1, so that we can modify it. */ + sub = PyUnicode_Substring(str, expr_start-1, expr_end+1); + if (!sub) + goto error; + assert(sub != str); /* Make sure it's a new string. */ + decref_sub = 1; /* Remember to deallocate it on error. */ + } + + /* Put () around the expression. */ + if (PyUnicode_WriteChar(sub, 0, '(') < 0 || + PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0) + goto error; + + /* No need to free the memory returned here: it's managed by the + string. */ + utf_expr = PyUnicode_AsUTF8(sub); + if (!utf_expr) + goto error; + + cf.cf_flags = PyCF_ONLY_AST; + mod = PyParser_ASTFromString(utf_expr, "", + Py_eval_input, &cf, c->c_arena); + if (!mod) + goto error; + + if (sub != str) + /* Clear instead of decref in case we ever modify this code to change + the error handling: this is safest because the XDECREF won't try + and decref it when it's NULL. */ + /* No need to restore the chars in sub, since we know it's getting + ready to get deleted (refcount must be 1, since we got a new string + in PyUnicode_Substring). */ + Py_CLEAR(sub); + else { + assert(!decref_sub); + assert(end_ch != (Py_UCS4)-1); + /* Restore str, which we earlier modified directly. */ + if (PyUnicode_WriteChar(str, 0, '{') < 0 || + PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0) + goto error; + } + return mod->v.Expression.body; + +error: + /* Only decref sub if it was the result of a call to SubString. */ + if (decref_sub) + Py_XDECREF(sub); + + if (end_ch != (Py_UCS4)-1) { + /* We only get here if we modified str. Make sure that's the + case: str will be equal to sub. */ + if (str == sub) { + /* Don't check the error, because we've already set the + error state (that's why we're in 'error', after + all). */ + PyUnicode_WriteChar(str, 0, '{'); + PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch); + } + } + return NULL; +} + +/* Return -1 on error. + + Return 0 if we reached the end of the literal. + + Return 1 if we haven't reached the end of the literal, but we want + the caller to process the literal up to this point. Used for + doubled braces. +*/ +static int +fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal, + int recurse_lvl, struct compiling *c, const node *n) +{ + /* Get any literal string. It ends when we hit an un-doubled brace, or the + end of the string. */ + + Py_ssize_t literal_start, literal_end; + int result = 0; + + enum PyUnicode_Kind kind = PyUnicode_KIND(str); + void *data = PyUnicode_DATA(str); + + assert(*literal == NULL); + + literal_start = *ofs; + for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { + Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs); + if (ch == '{' || ch == '}') { + /* Check for doubled braces, but only at the top level. If + we checked at every level, then f'{0:{3}}' would fail + with the two closing braces. */ + if (recurse_lvl == 0) { + if (*ofs + 1 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs + 1) == ch) { + /* We're going to tell the caller that the literal ends + here, but that they should continue scanning. But also + skip over the second brace when we resume scanning. */ + literal_end = *ofs + 1; + *ofs += 2; + result = 1; + goto done; + } + + /* Where a single '{' is the start of a new expression, a + single '}' is not allowed. */ + if (ch == '}') { + ast_error(c, n, "f-string: single '}' is not allowed"); + return -1; + } + } + + /* We're either at a '{', which means we're starting another + expression; or a '}', which means we're at the end of this + f-string (for a nested format_spec). */ + break; + } + } + literal_end = *ofs; + + assert(*ofs == PyUnicode_GET_LENGTH(str) || + PyUnicode_READ(kind, data, *ofs) == '{' || + PyUnicode_READ(kind, data, *ofs) == '}'); +done: + if (literal_start != literal_end) { + *literal = PyUnicode_Substring(str, literal_start, literal_end); + if (!*literal) + return -1; + } + + return result; +} + +/* Forward declaration because parsing is recursive. */ +static expr_ty +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n); + +/* Parse the f-string str, starting at ofs. We know *ofs starts an + expression (so it must be a '{'). Returns the FormattedValue node, + which includes the expression, conversion character, and + format_spec expression. + + Note that I don't do a perfect job here: I don't make sure that a + closing brace doesn't match an opening paren, for example. It + doesn't need to error on all invalid expressions, just correctly + find the end of all valid ones. Any errors inside the expression + will be caught when we parse it later. */ +static int +fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + expr_ty *expression, struct compiling *c, const node *n) +{ + /* Return -1 on error, else 0. */ + + Py_ssize_t expr_start; + Py_ssize_t expr_end; + expr_ty simple_expression; + expr_ty format_spec = NULL; /* Optional format specifier. */ + Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */ + + enum PyUnicode_Kind kind = PyUnicode_KIND(str); + void *data = PyUnicode_DATA(str); + + /* 0 if we're not in a string, else the quote char we're trying to + match (single or double quote). */ + Py_UCS4 quote_char = 0; + + /* If we're inside a string, 1=normal, 3=triple-quoted. */ + int string_type = 0; + + /* Keep track of nesting level for braces/parens/brackets in + expressions. */ + Py_ssize_t nested_depth = 0; + + /* Can only nest one level deep. */ + if (recurse_lvl >= 2) { + ast_error(c, n, "f-string: expressions nested too deeply"); + return -1; + } + + /* The first char must be a left brace, or we wouldn't have gotten + here. Skip over it. */ + assert(PyUnicode_READ(kind, data, *ofs) == '{'); + *ofs += 1; + + expr_start = *ofs; + for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { + Py_UCS4 ch; + + /* Loop invariants. */ + assert(nested_depth >= 0); + assert(*ofs >= expr_start); + if (quote_char) + assert(string_type == 1 || string_type == 3); + else + assert(string_type == 0); + + ch = PyUnicode_READ(kind, data, *ofs); + if (quote_char) { + /* We're inside a string. See if we're at the end. */ + /* This code needs to implement the same non-error logic + as tok_get from tokenizer.c, at the letter_quote + label. To actually share that code would be a + nightmare. But, it's unlikely to change and is small, + so duplicate it here. Note we don't need to catch all + of the errors, since they'll be caught when parsing the + expression. We just need to match the non-error + cases. Thus we can ignore \n in single-quoted strings, + for example. Or non-terminated strings. */ + if (ch == quote_char) { + /* Does this match the string_type (single or triple + quoted)? */ + if (string_type == 3) { + if (*ofs+2 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == ch && + PyUnicode_READ(kind, data, *ofs+2) == ch) { + /* We're at the end of a triple quoted string. */ + *ofs += 2; + string_type = 0; + quote_char = 0; + continue; + } + } else { + /* We're at the end of a normal string. */ + quote_char = 0; + string_type = 0; + continue; + } + } + /* We're inside a string, and not finished with the + string. If this is a backslash, skip the next char (it + might be an end quote that needs skipping). Otherwise, + just consume this character normally. */ + if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) { + /* Just skip the next char, whatever it is. */ + *ofs += 1; + } + } else if (ch == '\'' || ch == '"') { + /* Is this a triple quoted string? */ + if (*ofs+2 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == ch && + PyUnicode_READ(kind, data, *ofs+2) == ch) { + string_type = 3; + *ofs += 2; + } else { + /* Start of a normal string. */ + string_type = 1; + } + /* Start looking for the end of the string. */ + quote_char = ch; + } else if (ch == '[' || ch == '{' || ch == '(') { + nested_depth++; + } else if (nested_depth != 0 && + (ch == ']' || ch == '}' || ch == ')')) { + nested_depth--; + } else if (ch == '#') { + /* Error: can't include a comment character, inside parens + or not. */ + ast_error(c, n, "f-string cannot include '#'"); + return -1; + } else if (nested_depth == 0 && + (ch == '!' || ch == ':' || ch == '}')) { + /* First, test for the special case of "!=". Since '=' is + not an allowed conversion character, nothing is lost in + this test. */ + if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == '=') + /* This isn't a conversion character, just continue. */ + continue; + + /* Normal way out of this loop. */ + break; + } else { + /* Just consume this char and loop around. */ + } + } + expr_end = *ofs; + /* If we leave this loop in a string or with mismatched parens, we + don't care. We'll get a syntax error when compiling the + expression. But, we can produce a better error message, so + let's just do that.*/ + if (quote_char) { + ast_error(c, n, "f-string: unterminated string"); + return -1; + } + if (nested_depth) { + ast_error(c, n, "f-string: mismatched '(', '{', or '['"); + return -1; + } + + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + /* Compile the expression as soon as possible, so we show errors + related to the expression before errors related to the + conversion or format_spec. */ + simple_expression = fstring_compile_expr(str, expr_start, expr_end, c, n); + if (!simple_expression) + return -1; + + /* Check for a conversion char, if present. */ + if (PyUnicode_READ(kind, data, *ofs) == '!') { + *ofs += 1; + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + conversion = PyUnicode_READ(kind, data, *ofs); + *ofs += 1; + + /* Validate the conversion. */ + if (!(conversion == 's' || conversion == 'r' + || conversion == 'a')) { + ast_error(c, n, "f-string: invalid conversion character: " + "expected 's', 'r', or 'a'"); + return -1; + } + } + + /* Check for the format spec, if present. */ + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + if (PyUnicode_READ(kind, data, *ofs) == ':') { + *ofs += 1; + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + /* Parse the format spec. */ + format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n); + if (!format_spec) + return -1; + } + + if (*ofs >= PyUnicode_GET_LENGTH(str) || + PyUnicode_READ(kind, data, *ofs) != '}') + goto unexpected_end_of_string; + + /* We're at a right brace. Consume it. */ + assert(*ofs < PyUnicode_GET_LENGTH(str)); + assert(PyUnicode_READ(kind, data, *ofs) == '}'); + *ofs += 1; + + /* And now create the FormattedValue node that represents this entire + expression with the conversion and format spec. */ + *expression = FormattedValue(simple_expression, (int)conversion, + format_spec, LINENO(n), n->n_col_offset, + c->c_arena); + if (!*expression) + return -1; + + return 0; + +unexpected_end_of_string: + ast_error(c, n, "f-string: expecting '}'"); + return -1; +} + +/* Return -1 on error. + + Return 0 if we have a literal (possible zero length) and an + expression (zero length if at the end of the string. + + Return 1 if we have a literal, but no expression, and we want the + caller to call us again. This is used to deal with doubled + braces. + + When called multiple times on the string 'a{{b{0}c', this function + will return: + + 1. the literal 'a{' with no expression, and a return value + of 1. Despite the fact that there's no expression, the return + value of 1 means we're not finished yet. + + 2. the literal 'b' and the expression '0', with a return value of + 0. The fact that there's an expression means we're not finished. + + 3. literal 'c' with no expression and a return value of 0. The + combination of the return value of 0 with no expression means + we're finished. +*/ +static int +fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + PyObject **literal, expr_ty *expression, + struct compiling *c, const node *n) +{ + int result; + + assert(*literal == NULL && *expression == NULL); + + /* Get any literal string. */ + result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n); + if (result < 0) + goto error; + + assert(result == 0 || result == 1); + + if (result == 1) + /* We have a literal, but don't look at the expression. */ + return 1; + + assert(*ofs <= PyUnicode_GET_LENGTH(str)); + + if (*ofs >= PyUnicode_GET_LENGTH(str) || + PyUnicode_READ_CHAR(str, *ofs) == '}') + /* We're at the end of the string or the end of a nested + f-string: no expression. The top-level error case where we + expect to be at the end of the string but we're at a '}' is + handled later. */ + return 0; + + /* We must now be the start of an expression, on a '{'. */ + assert(*ofs < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ_CHAR(str, *ofs) == '{'); + + if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0) + goto error; + + return 0; + +error: - Py_XDECREF(*literal); - *literal = NULL; ++ Py_CLEAR(*literal); + return -1; +} + +#define EXPRLIST_N_CACHED 64 + +typedef struct { + /* Incrementally build an array of expr_ty, so be used in an + asdl_seq. Cache some small but reasonably sized number of + expr_ty's, and then after that start dynamically allocating, + doubling the number allocated each time. Note that the f-string + f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one + Str for the literal 'a'. So you add expr_ty's about twice as + fast as you add exressions in an f-string. */ + + Py_ssize_t allocated; /* Number we've allocated. */ + Py_ssize_t size; /* Number we've used. */ + expr_ty *p; /* Pointer to the memory we're actually + using. Will point to 'data' until we + start dynamically allocating. */ + expr_ty data[EXPRLIST_N_CACHED]; +} ExprList; + +#ifdef NDEBUG +#define ExprList_check_invariants(l) +#else +static void +ExprList_check_invariants(ExprList *l) +{ + /* Check our invariants. Make sure this object is "live", and + hasn't been deallocated. */ + assert(l->size >= 0); + assert(l->p != NULL); + if (l->size <= EXPRLIST_N_CACHED) + assert(l->data == l->p); +} +#endif + +static void +ExprList_Init(ExprList *l) +{ + l->allocated = EXPRLIST_N_CACHED; + l->size = 0; + + /* Until we start allocating dynamically, p points to data. */ + l->p = l->data; + + ExprList_check_invariants(l); +} + +static int +ExprList_Append(ExprList *l, expr_ty exp) +{ + ExprList_check_invariants(l); + if (l->size >= l->allocated) { + /* We need to alloc (or realloc) the memory. */ + Py_ssize_t new_size = l->allocated * 2; + + /* See if we've ever allocated anything dynamically. */ + if (l->p == l->data) { + Py_ssize_t i; + /* We're still using the cached data. Switch to + alloc-ing. */ + l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size); + if (!l->p) + return -1; + /* Copy the cached data into the new buffer. */ + for (i = 0; i < l->size; i++) + l->p[i] = l->data[i]; + } else { + /* Just realloc. */ + expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size); + if (!tmp) { + PyMem_RawFree(l->p); + l->p = NULL; + return -1; + } + l->p = tmp; + } + + l->allocated = new_size; + assert(l->allocated == 2 * l->size); + } + + l->p[l->size++] = exp; + + ExprList_check_invariants(l); + return 0; +} + +static void +ExprList_Dealloc(ExprList *l) +{ + ExprList_check_invariants(l); + + /* If there's been an error, or we've never dynamically allocated, + do nothing. */ + if (!l->p || l->p == l->data) { + /* Do nothing. */ + } else { + /* We have dynamically allocated. Free the memory. */ + PyMem_RawFree(l->p); + } + l->p = NULL; + l->size = -1; +} + +static asdl_seq * +ExprList_Finish(ExprList *l, PyArena *arena) +{ + asdl_seq *seq; + + ExprList_check_invariants(l); + + /* Allocate the asdl_seq and copy the expressions in to it. */ + seq = _Py_asdl_seq_new(l->size, arena); + if (seq) { + Py_ssize_t i; + for (i = 0; i < l->size; i++) + asdl_seq_SET(seq, i, l->p[i]); + } + ExprList_Dealloc(l); + return seq; +} + +/* The FstringParser is designed to add a mix of strings and + f-strings, and concat them together as needed. Ultimately, it + generates an expr_ty. */ +typedef struct { + PyObject *last_str; + ExprList expr_list; +} FstringParser; + +#ifdef NDEBUG +#define FstringParser_check_invariants(state) +#else +static void +FstringParser_check_invariants(FstringParser *state) +{ + if (state->last_str) + assert(PyUnicode_CheckExact(state->last_str)); + ExprList_check_invariants(&state->expr_list); +} +#endif + +static void +FstringParser_Init(FstringParser *state) +{ + state->last_str = NULL; + ExprList_Init(&state->expr_list); + FstringParser_check_invariants(state); +} + +static void +FstringParser_Dealloc(FstringParser *state) +{ + FstringParser_check_invariants(state); + + Py_XDECREF(state->last_str); + ExprList_Dealloc(&state->expr_list); +} + +/* Make a Str node, but decref the PyUnicode object being added. */ +static expr_ty +make_str_node_and_del(PyObject **str, struct compiling *c, const node* n) +{ + PyObject *s = *str; + *str = NULL; + assert(PyUnicode_CheckExact(s)); + if (PyArena_AddPyObject(c->c_arena, s) < 0) { + Py_DECREF(s); + return NULL; + } + return Str(s, LINENO(n), n->n_col_offset, c->c_arena); +} + +/* Add a non-f-string (that is, a regular literal string). str is + decref'd. */ +static int +FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) +{ + FstringParser_check_invariants(state); + + assert(PyUnicode_CheckExact(str)); + + if (PyUnicode_GET_LENGTH(str) == 0) { + Py_DECREF(str); + return 0; + } + + if (!state->last_str) { + /* We didn't have a string before, so just remember this one. */ + state->last_str = str; + } else { + /* Concatenate this with the previous string. */ - PyObject *temp = PyUnicode_Concat(state->last_str, str); - Py_DECREF(state->last_str); - Py_DECREF(str); - state->last_str = temp; - if (!temp) ++ PyUnicode_AppendAndDel(&state->last_str, str); ++ if (!state->last_str) + return -1; + } + FstringParser_check_invariants(state); + return 0; +} + +/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f' + or quotes. str is not decref'd, since we don't know if it's used elsewhere. + And if we're only looking at a part of a string, then decref'ing is + definitely not the right thing to do! */ +static int +FstringParser_ConcatFstring(FstringParser *state, PyObject *str, + Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n) +{ + FstringParser_check_invariants(state); + + /* Parse the f-string. */ + while (1) { + PyObject *literal = NULL; + expr_ty expression = NULL; + + /* If there's a zero length literal in front of the + expression, literal will be NULL. If we're at the end of + the f-string, expression will be NULL (unless result == 1, + see below). */ + int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl, + &literal, &expression, + c, n); + if (result < 0) + return -1; + + /* Add the literal, if any. */ + if (!literal) { + /* Do nothing. Just leave last_str alone (and possibly + NULL). */ + } else if (!state->last_str) { + state->last_str = literal; + literal = NULL; + } else { + /* We have a literal, concatenate it. */ + assert(PyUnicode_GET_LENGTH(literal) != 0); + if (FstringParser_ConcatAndDel(state, literal) < 0) + return -1; + literal = NULL; + } + assert(!state->last_str || + PyUnicode_GET_LENGTH(state->last_str) != 0); + + /* We've dealt with the literal now. It can't be leaked on further + errors. */ + assert(literal == NULL); + + /* See if we should just loop around to get the next literal + and expression, while ignoring the expression this + time. This is used for un-doubling braces, as an + optimization. */ + if (result == 1) + continue; + + if (!expression) + /* We're done with this f-string. */ + break; + + /* We know we have an expression. Convert any existing string + to a Str node. */ + if (!state->last_str) { + /* Do nothing. No previous literal. */ + } else { + /* Convert the existing last_str literal to a Str node. */ + expr_ty str = make_str_node_and_del(&state->last_str, c, n); + if (!str || ExprList_Append(&state->expr_list, str) < 0) + return -1; + } + + if (ExprList_Append(&state->expr_list, expression) < 0) + return -1; + } + + assert(*ofs <= PyUnicode_GET_LENGTH(str)); + + /* If recurse_lvl is zero, then we must be at the end of the + string. Otherwise, we must be at a right brace. */ + + if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) { + ast_error(c, n, "f-string: unexpected end of string"); + return -1; + } + if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') { + ast_error(c, n, "f-string: expecting '}'"); + return -1; + } + + FstringParser_check_invariants(state); + return 0; +} + +/* Convert the partial state reflected in last_str and expr_list to an + expr_ty. The expr_ty can be a Str, or a JoinedStr. */ +static expr_ty +FstringParser_Finish(FstringParser *state, struct compiling *c, + const node *n) +{ + asdl_seq *seq; + + FstringParser_check_invariants(state); + + /* If we're just a constant string with no expressions, return + that. */ + if(state->expr_list.size == 0) { + if (!state->last_str) { + /* Create a zero length string. */ + state->last_str = PyUnicode_FromStringAndSize(NULL, 0); + if (!state->last_str) + goto error; + } + return make_str_node_and_del(&state->last_str, c, n); + } + + /* Create a Str node out of last_str, if needed. It will be the + last node in our expression list. */ + if (state->last_str) { + expr_ty str = make_str_node_and_del(&state->last_str, c, n); + if (!str || ExprList_Append(&state->expr_list, str) < 0) + goto error; + } + /* This has already been freed. */ + assert(state->last_str == NULL); + + seq = ExprList_Finish(&state->expr_list, c->c_arena); + if (!seq) + goto error; + + /* If there's only one expression, return it. Otherwise, we need + to join them together. */ + if (seq->size == 1) + return seq->elements[0]; + + return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena); + +error: + FstringParser_Dealloc(state); + return NULL; +} + +/* Given an f-string (with no 'f' or quotes) that's in str starting at + ofs, parse it into an expr_ty. Return NULL on error. Does not + decref str. */ +static expr_ty +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n) +{ + FstringParser state; + + FstringParser_Init(&state); + if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl, + c, n) < 0) { + FstringParser_Dealloc(&state); + return NULL; + } + + return FstringParser_Finish(&state, c, n); +} + +/* n is a Python string literal, including the bracketing quote + characters, and r, b, u, &/or f prefixes (if any), and embedded + escape sequences (if any). parsestr parses it, and returns the + decoded Python string object. If the string is an f-string, set + *fmode and return the unparsed string object. +*/ static PyObject * -parsestr(struct compiling *c, const node *n, int *bytesmode) +parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) { size_t len; const char *s = STR(n);