self.assertEqual(out.getvalue().splitlines(),
['literal 102 ', 'literal 111 ', 'literal 111 '])
+ def test_keyword_parameters(self):
+ # Issue #20283: Accepting the string keyword parameter.
+ pat = re.compile(r'(ab)')
+ self.assertEqual(
+ pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
++ self.assertEqual(
++ pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
+ self.assertEqual(
+ pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
+ self.assertEqual(
+ pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
+ self.assertEqual(
+ pat.split(string='abracadabra', maxsplit=1),
+ ['', 'ab', 'racadabra'])
+ self.assertEqual(
+ pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
+ (7, 9))
+
+class PatternReprTests(unittest.TestCase):
+ def check(self, pattern, expected):
+ self.assertEqual(repr(re.compile(pattern)), expected)
+
+ def check_flags(self, pattern, flags, expected):
+ self.assertEqual(repr(re.compile(pattern, flags)), expected)
+
+ def test_without_flags(self):
+ self.check('random pattern',
+ "re.compile('random pattern')")
+
+ def test_single_flag(self):
+ self.check_flags('random pattern', re.IGNORECASE,
+ "re.compile('random pattern', re.IGNORECASE)")
+
+ def test_multiple_flags(self):
+ self.check_flags('random pattern', re.I|re.S|re.X,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
+
+ def test_unicode_flag(self):
+ self.check_flags('random pattern', re.U,
+ "re.compile('random pattern')")
+ self.check_flags('random pattern', re.I|re.S|re.U,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL)")
+
+ def test_inline_flags(self):
+ self.check('(?i)pattern',
+ "re.compile('(?i)pattern', re.IGNORECASE)")
+
+ def test_unknown_flags(self):
+ self.check_flags('random pattern', 0x123000,
+ "re.compile('random pattern', 0x123000)")
+ self.check_flags('random pattern', 0x123000|re.I,
+ "re.compile('random pattern', re.IGNORECASE|0x123000)")
+
+ def test_bytes(self):
+ self.check(b'bytes pattern',
+ "re.compile(b'bytes pattern')")
+ self.check_flags(b'bytes pattern', re.A,
+ "re.compile(b'bytes pattern', re.ASCII)")
+
+ def test_quotes(self):
+ self.check('random "double quoted" pattern',
+ '''re.compile('random "double quoted" pattern')''')
+ self.check("random 'single quoted' pattern",
+ '''re.compile("random 'single quoted' pattern")''')
+ self.check('''both 'single' and "double" quotes''',
+ '''re.compile('both \\'single\\' and "double" quotes')''')
+
+ def test_long_pattern(self):
+ pattern = 'Very %spattern' % ('long ' * 1000)
+ r = repr(re.compile(pattern))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ r = repr(re.compile(pattern, re.I))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ self.assertEqual(r[-16:], ", re.IGNORECASE)")
+
+
+class ImplementationTest(unittest.TestCase):
+ """
+ Test implementation details of the re module.
+ """
+
+ def test_overlap_table(self):
+ f = sre_compile._generate_overlap_table
+ self.assertEqual(f(""), [])
+ self.assertEqual(f("a"), [0])
+ self.assertEqual(f("abcd"), [0, 0, 0, 0])
+ self.assertEqual(f("aaaa"), [0, 1, 2, 3])
+ self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
+ self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
+
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
PyObject_DEL(self);
}
-static PyObject*
+LOCAL(Py_ssize_t)
+sre_match(SRE_STATE* state, SRE_CODE* pattern)
+{
+ if (state->charsize == 1)
+ return sre_ucs1_match(state, pattern);
+ if (state->charsize == 2)
+ return sre_ucs2_match(state, pattern);
+ assert(state->charsize == 4);
+ return sre_ucs4_match(state, pattern);
+}
+
+LOCAL(Py_ssize_t)
+sre_search(SRE_STATE* state, SRE_CODE* pattern)
+{
+ if (state->charsize == 1)
+ return sre_ucs1_search(state, pattern);
+ if (state->charsize == 2)
+ return sre_ucs2_search(state, pattern);
+ assert(state->charsize == 4);
+ return sre_ucs4_search(state, pattern);
+}
+
- /*[clinic input]
- module _sre
- class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
-
- _sre.SRE_Pattern.match as pattern_match
-
- pattern: object
- pos: Py_ssize_t = 0
- endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
-
- Matches zero or more characters at the beginning of the string.
- [clinic start generated code]*/
-
- PyDoc_STRVAR(pattern_match__doc__,
- "match($self, /, pattern, pos=0, endpos=sys.maxsize)\n"
- "--\n"
- "\n"
- "Matches zero or more characters at the beginning of the string.");
-
- #define PATTERN_MATCH_METHODDEF \
- {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match__doc__},
-
+static PyObject *
- pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos);
+ fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
+ {
+ if (string2 != NULL) {
+ if (string != NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "Argument given by name ('%s') and position (1)",
+ oldname);
+ return NULL;
+ }
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "The '%s' keyword parameter name is deprecated. "
+ "Use 'string' instead.", oldname) < 0)
+ return NULL;
+ return string2;
+ }
+ if (string == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "Required argument 'string' (pos 1) not found");
+ return NULL;
+ }
+ return string;
+ }
- PyObject *return_value = NULL;
- static char *_keywords[] = {"pattern", "pos", "endpos", NULL};
- PyObject *pattern;
+static PyObject *
+pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
+{
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs,
- "O|nn:match", _keywords,
- &pattern, &pos, &endpos))
- goto exit;
- return_value = pattern_match_impl(self, pattern, pos, endpos);
-
- exit:
- return return_value;
- }
-
- static PyObject *
- pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos)
- /*[clinic end generated code: output=1528eafdb8b025ad input=26f9fd31befe46b9]*/
- {
++ static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
++ PyObject *string = NULL;
+ Py_ssize_t pos = 0;
+ Py_ssize_t endpos = PY_SSIZE_T_MAX;
- PyObject *string;
++ PyObject *pattern = NULL;
+ SRE_STATE state;
+ Py_ssize_t status;
- string = state_init(&state, (PatternObject *)self, pattern, pos, endpos);
+
++ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
++ "|Onn$O:match", _keywords,
++ &string, &pos, &endpos, &pattern))
++ return NULL;
++ string = fix_string_param(string, pattern, "pattern");
++ if (!string)
++ return NULL;
++ string = state_init(&state, (PatternObject *)self, string, pos, endpos);
+ if (!string)
+ return NULL;
+
+ state.ptr = state.start;
+
+ TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
+
+ status = sre_match(&state, PatternObject_GetCode(self));
+
+ TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+ if (PyErr_Occurred())
+ return NULL;
+
+ state_fini(&state);
+
+ return (PyObject *)pattern_new_match(self, &state, status);
+}
+
static PyObject*
-pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
+pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
- int status;
+ Py_ssize_t status;
- PyObject* string;
+ PyObject *string = NULL, *string2 = NULL;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
- &string, &start, &end))
+ static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:match", kwlist,
++ if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
+ &string, &start, &end, &string2))
+ return NULL;
+
+ string = fix_string_param(string, string2, "pattern");
+ if (!string)
return NULL;
string = state_init(&state, self, string, start, end);
pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
- int status;
+ Py_ssize_t status;
- PyObject* string;
+ PyObject *string = NULL, *string2 = NULL;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
- &string, &start, &end))
+ static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
+ &string, &start, &end, &string2))
+ return NULL;
+
+ string = fix_string_param(string, string2, "pattern");
+ if (!string)
return NULL;
string = state_init(&state, self, string, start, end);
{
SRE_STATE state;
PyObject* list;
- int status;
+ Py_ssize_t status;
Py_ssize_t i, b, e;
- PyObject* string;
+ PyObject *string = NULL, *string2 = NULL;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "source", "pos", "endpos", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
- &string, &start, &end))
+ static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
+ &string, &start, &end, &string2))
+ return NULL;
+
+ string = fix_string_param(string, string2, "source");
+ if (!string)
return NULL;
string = state_init(&state, self, string, start, end);
#endif
}
+static PyObject *
+pattern_repr(PatternObject *obj)
+{
+ static const struct {
+ const char *name;
+ int value;
+ } flag_names[] = {
+ {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
+ {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
+ {"re.LOCALE", SRE_FLAG_LOCALE},
+ {"re.MULTILINE", SRE_FLAG_MULTILINE},
+ {"re.DOTALL", SRE_FLAG_DOTALL},
+ {"re.UNICODE", SRE_FLAG_UNICODE},
+ {"re.VERBOSE", SRE_FLAG_VERBOSE},
+ {"re.DEBUG", SRE_FLAG_DEBUG},
+ {"re.ASCII", SRE_FLAG_ASCII},
+ };
+ PyObject *result = NULL;
+ PyObject *flag_items;
+ int i;
+ int flags = obj->flags;
+
+ /* Omit re.UNICODE for valid string patterns. */
+ if (obj->isbytes == 0 &&
+ (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
+ SRE_FLAG_UNICODE)
+ flags &= ~SRE_FLAG_UNICODE;
+
+ flag_items = PyList_New(0);
+ if (!flag_items)
+ return NULL;
+
+ for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
+ if (flags & flag_names[i].value) {
+ PyObject *item = PyUnicode_FromString(flag_names[i].name);
+ if (!item)
+ goto done;
+
+ if (PyList_Append(flag_items, item) < 0) {
+ Py_DECREF(item);
+ goto done;
+ }
+ Py_DECREF(item);
+ flags &= ~flag_names[i].value;
+ }
+ }
+ if (flags) {
+ PyObject *item = PyUnicode_FromFormat("0x%x", flags);
+ if (!item)
+ goto done;
+
+ if (PyList_Append(flag_items, item) < 0) {
+ Py_DECREF(item);
+ goto done;
+ }
+ Py_DECREF(item);
+ }
+
+ if (PyList_Size(flag_items) > 0) {
+ PyObject *flags_result;
+ PyObject *sep = PyUnicode_FromString("|");
+ if (!sep)
+ goto done;
+ flags_result = PyUnicode_Join(sep, flag_items);
+ Py_DECREF(sep);
+ if (!flags_result)
+ goto done;
+ result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
+ obj->pattern, flags_result);
+ Py_DECREF(flags_result);
+ }
+ else {
+ result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
+ }
+
+done:
+ Py_DECREF(flag_items);
+ return result;
+}
+
+ PyDoc_STRVAR(pattern_match_doc,
+ "match(string[, pos[, endpos]]) -> match object or None.\n\
+ Matches zero or more characters at the beginning of the string");
+
+PyDoc_STRVAR(pattern_fullmatch_doc,
+"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
+ Matches against all of the string");
+
PyDoc_STRVAR(pattern_search_doc,
"search(string[, pos[, endpos]]) -> match object or None.\n\
Scan through string looking for a match, and return a corresponding\n\
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
static PyMethodDef pattern_methods[] = {
- PATTERN_MATCH_METHODDEF
+ {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
+ pattern_match_doc},
+ {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
+ pattern_fullmatch_doc},
{"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
pattern_search_doc},
{"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,