From: Fredrik Lundh Date: Tue, 18 Sep 2001 20:55:24 +0000 (+0000) Subject: fixed #449964: sre.sub raises an exception if the template contains a X-Git-Tag: v2.2.1c1~1743 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=59b68656f846973840953220c4780f3558b59fb8;p=python fixed #449964: sre.sub raises an exception if the template contains a \g group reference followed by a character escape (also restructured a few things on the way to fixing #449000) --- diff --git a/Lib/sre.py b/Lib/sre.py index a87870eeef..7a640f90d9 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -251,11 +251,13 @@ def _subn(pattern, template, text, count=0, sub=0): else: template = _compile_repl(template, pattern) literals = template[1] - sub = 0 # temporarly disabled, see bug #449000 - if (sub and not count and pattern._isliteral() and - len(literals) == 1 and literals[0]): - # shortcut: both pattern and string are literals - return string.replace(text, pattern.pattern, literals[0]), 0 + if sub and not count: + literal = pattern._getliteral() + if literal and "\\" in literal: + literal = None # may contain untranslated escapes + if literal is not None and len(literals) == 1 and literals[0]: + # shortcut: both pattern and string are literals + return string.replace(text, pattern.pattern, literals[0]), 0 def filter(match, template=template): return sre_parse.expand_template(template, match) n = i = 0 diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index af1edbf3d2..7d9b8899bc 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -647,9 +647,9 @@ def parse_template(source, pattern): p.append((LITERAL, literal)) sep = source[:0] if type(sep) is type(""): - char = chr + makechar = chr else: - char = unichr + makechar = unichr while 1: this = s.get() if this is None: @@ -693,14 +693,14 @@ def parse_template(source, pattern): break if not code: this = this[1:] - code = LITERAL, char(atoi(this[-6:], 8) & 0xff) + code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff) if code[0] is LITERAL: literal(code[1]) else: a(code) else: try: - this = char(ESCAPES[this][1]) + this = makechar(ESCAPES[this][1]) except KeyError: pass literal(this) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 49fe4c64a9..4a71447582 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -104,6 +104,9 @@ test(r"""sre.sub(r'(?Px)', '\g\g<1>', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?Px)', '\g\g', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?Px)', '\g<1>\g<1>', 'xx')""", 'xxxx') +# bug 449964: fails for group followed by other escape +test(r"""sre.sub(r'(?Px)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b') + test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a') test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) diff --git a/Modules/_sre.c b/Modules/_sre.c index b0ab66308f..32cd48bcd4 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -31,7 +31,7 @@ * 2001-04-28 fl added __copy__ methods (work in progress) * 2001-05-14 fl fixes for 1.5.2 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-09-18 fl + * 2001-09-18 fl added _getliteral helper * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * @@ -1959,25 +1959,29 @@ pattern_deepcopy(PatternObject* self, PyObject* args) } static PyObject* -pattern_isliteral(PatternObject* self, PyObject* args) +pattern_getliteral(PatternObject* self, PyObject* args) { - /* internal: return true if pattern consists of literal text only */ + /* internal: if the pattern is a literal string, return that + string. otherwise, return None */ SRE_CODE* code; - PyObject* isliteral; + PyObject* literal; - if (!PyArg_ParseTuple(args, ":_isliteral")) + if (!PyArg_ParseTuple(args, ":_getliteral")) return NULL; code = PatternObject_GetCode(self); - if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) - isliteral = Py_True; - else - isliteral = Py_False; + if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) { + /* FIXME: extract literal string from code buffer. we can't + use the pattern member, since it may contain untranslated + escape codes (see SF bug 449000) */ + literal = Py_None; + } else + literal = Py_None; /* no literal */ - Py_INCREF(isliteral); - return isliteral; + Py_INCREF(literal); + return literal; } static PyMethodDef pattern_methods[] = { @@ -1990,7 +1994,7 @@ static PyMethodDef pattern_methods[] = { {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS}, - {"_isliteral", (PyCFunction) pattern_isliteral, METH_VARARGS}, + {"_getliteral", (PyCFunction) pattern_getliteral, METH_VARARGS}, {NULL, NULL} };