Issue #20283: RE pattern methods now accept the string keyword parameters
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 6 Mar 2014 09:36:15 +0000 (11:36 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Thu, 6 Mar 2014 09:36:15 +0000 (11:36 +0200)
as documented.  The pattern and source keyword parameters are left as
deprecated aliases.

1  2 
Lib/test/test_re.py
Misc/NEWS
Modules/_sre.c

index a229e235ca202a22daa5411bcd483ae8ba221171,5466b2065395deeec65476226b5c6ba678eedb8a..33ccd15398302106e0556723f9bff0f85ed67596
@@@ -1205,84 -1076,23 +1205,102 @@@ class ReTests(unittest.TestCase)
          self.assertEqual(out.getvalue().splitlines(),
                           ['literal 102 ', 'literal 111 ', 'literal 111 '])
  
+     def test_keyword_parameters(self):
+         # Issue #20283: Accepting the string keyword parameter.
+         pat = re.compile(r'(ab)')
+         self.assertEqual(
+             pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
++        self.assertEqual(
++            pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9))
+         self.assertEqual(
+             pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
+         self.assertEqual(
+             pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
+         self.assertEqual(
+             pat.split(string='abracadabra', maxsplit=1),
+             ['', 'ab', 'racadabra'])
+         self.assertEqual(
+             pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(),
+             (7, 9))
  
 +class PatternReprTests(unittest.TestCase):
 +    def check(self, pattern, expected):
 +        self.assertEqual(repr(re.compile(pattern)), expected)
 +
 +    def check_flags(self, pattern, flags, expected):
 +        self.assertEqual(repr(re.compile(pattern, flags)), expected)
 +
 +    def test_without_flags(self):
 +        self.check('random pattern',
 +                   "re.compile('random pattern')")
 +
 +    def test_single_flag(self):
 +        self.check_flags('random pattern', re.IGNORECASE,
 +            "re.compile('random pattern', re.IGNORECASE)")
 +
 +    def test_multiple_flags(self):
 +        self.check_flags('random pattern', re.I|re.S|re.X,
 +            "re.compile('random pattern', "
 +            "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
 +
 +    def test_unicode_flag(self):
 +        self.check_flags('random pattern', re.U,
 +                         "re.compile('random pattern')")
 +        self.check_flags('random pattern', re.I|re.S|re.U,
 +                         "re.compile('random pattern', "
 +                         "re.IGNORECASE|re.DOTALL)")
 +
 +    def test_inline_flags(self):
 +        self.check('(?i)pattern',
 +                   "re.compile('(?i)pattern', re.IGNORECASE)")
 +
 +    def test_unknown_flags(self):
 +        self.check_flags('random pattern', 0x123000,
 +                         "re.compile('random pattern', 0x123000)")
 +        self.check_flags('random pattern', 0x123000|re.I,
 +            "re.compile('random pattern', re.IGNORECASE|0x123000)")
 +
 +    def test_bytes(self):
 +        self.check(b'bytes pattern',
 +                   "re.compile(b'bytes pattern')")
 +        self.check_flags(b'bytes pattern', re.A,
 +                         "re.compile(b'bytes pattern', re.ASCII)")
 +
 +    def test_quotes(self):
 +        self.check('random "double quoted" pattern',
 +            '''re.compile('random "double quoted" pattern')''')
 +        self.check("random 'single quoted' pattern",
 +            '''re.compile("random 'single quoted' pattern")''')
 +        self.check('''both 'single' and "double" quotes''',
 +            '''re.compile('both \\'single\\' and "double" quotes')''')
 +
 +    def test_long_pattern(self):
 +        pattern = 'Very %spattern' % ('long ' * 1000)
 +        r = repr(re.compile(pattern))
 +        self.assertLess(len(r), 300)
 +        self.assertEqual(r[:30], "re.compile('Very long long lon")
 +        r = repr(re.compile(pattern, re.I))
 +        self.assertLess(len(r), 300)
 +        self.assertEqual(r[:30], "re.compile('Very long long lon")
 +        self.assertEqual(r[-16:], ", re.IGNORECASE)")
 +
 +
 +class ImplementationTest(unittest.TestCase):
 +    """
 +    Test implementation details of the re module.
 +    """
 +
 +    def test_overlap_table(self):
 +        f = sre_compile._generate_overlap_table
 +        self.assertEqual(f(""), [])
 +        self.assertEqual(f("a"), [0])
 +        self.assertEqual(f("abcd"), [0, 0, 0, 0])
 +        self.assertEqual(f("aaaa"), [0, 1, 2, 3])
 +        self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
 +        self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
 +
 +
  def run_re_tests():
      from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
      if verbose:
diff --cc Misc/NEWS
index 1da5d6430f20a030345a0e1d5454fc0ec3aebb5b,62211f47113c993a041bfdbdd10c46ce887326d2..1b6516f6833133f69783ab1ef45c07f409628225
+++ b/Misc/NEWS
@@@ -20,10 -22,10 +20,14 @@@ Core and Builtin
  Library
  -------
  
+ - Issue #20283: RE pattern methods now accept the string keyword parameters
+   as documented.  The pattern and source keyword parameters are left as
+   deprecated aliases.
 +- Issue #20839: Don't trigger a DeprecationWarning in the still supported
 +  pkgutil.get_loader() API when __loader__ isn't set on a module (nor
 +  when pkgutil.find_loader() is called directly).
 +
  - Issue #20778: Fix modulefinder to work with bytecode-only modules.
  
  - Issue #20791: copy.copy() now doesn't make a copy when the input is
diff --cc Modules/_sre.c
index d4d1d9d0eb3c597c7244066e01a744fe017ba2ce,b1258eefc49771262ee27dc413e24080fa42b9b6..eb1106ad8055055134e1de8843941c9532964d26
@@@ -504,111 -1876,46 +504,105 @@@ pattern_dealloc(PatternObject* self
      PyObject_DEL(self);
  }
  
 -static PyObject*
 +LOCAL(Py_ssize_t)
 +sre_match(SRE_STATE* state, SRE_CODE* pattern)
 +{
 +    if (state->charsize == 1)
 +        return sre_ucs1_match(state, pattern);
 +    if (state->charsize == 2)
 +        return sre_ucs2_match(state, pattern);
 +    assert(state->charsize == 4);
 +    return sre_ucs4_match(state, pattern);
 +}
 +
 +LOCAL(Py_ssize_t)
 +sre_search(SRE_STATE* state, SRE_CODE* pattern)
 +{
 +    if (state->charsize == 1)
 +        return sre_ucs1_search(state, pattern);
 +    if (state->charsize == 2)
 +        return sre_ucs2_search(state, pattern);
 +    assert(state->charsize == 4);
 +    return sre_ucs4_search(state, pattern);
 +}
 +
- /*[clinic input]
- module _sre
- class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
- _sre.SRE_Pattern.match as pattern_match
-     pattern: object
-     pos: Py_ssize_t = 0
-     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
- Matches zero or more characters at the beginning of the string.
- [clinic start generated code]*/
- PyDoc_STRVAR(pattern_match__doc__,
- "match($self, /, pattern, pos=0, endpos=sys.maxsize)\n"
- "--\n"
- "\n"
- "Matches zero or more characters at the beginning of the string.");
- #define PATTERN_MATCH_METHODDEF    \
-     {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match__doc__},
 +static PyObject *
- pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos);
+ fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
+ {
+     if (string2 != NULL) {
+         if (string != NULL) {
+             PyErr_Format(PyExc_TypeError,
+                          "Argument given by name ('%s') and position (1)",
+                          oldname);
+             return NULL;
+         }
+         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                              "The '%s' keyword parameter name is deprecated.  "
+                              "Use 'string' instead.", oldname) < 0)
+             return NULL;
+         return string2;
+     }
+     if (string == NULL) {
+         PyErr_SetString(PyExc_TypeError,
+                         "Required argument 'string' (pos 1) not found");
+         return NULL;
+     }
+     return string;
+ }
  
-     PyObject *return_value = NULL;
-     static char *_keywords[] = {"pattern", "pos", "endpos", NULL};
-     PyObject *pattern;
 +static PyObject *
 +pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
 +{
-     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
-         "O|nn:match", _keywords,
-         &pattern, &pos, &endpos))
-         goto exit;
-     return_value = pattern_match_impl(self, pattern, pos, endpos);
- exit:
-     return return_value;
- }
- static PyObject *
- pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos)
- /*[clinic end generated code: output=1528eafdb8b025ad input=26f9fd31befe46b9]*/
- {
++    static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
++    PyObject *string = NULL;
 +    Py_ssize_t pos = 0;
 +    Py_ssize_t endpos = PY_SSIZE_T_MAX;
-     PyObject *string;
++    PyObject *pattern = NULL;
 +    SRE_STATE state;
 +    Py_ssize_t status;
-     string = state_init(&state, (PatternObject *)self, pattern, pos, endpos);
 +
++    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
++        "|Onn$O:match", _keywords,
++        &string, &pos, &endpos, &pattern))
++        return NULL;
++    string = fix_string_param(string, pattern, "pattern");
++    if (!string)
++        return NULL;
++    string = state_init(&state, (PatternObject *)self, string, pos, endpos);
 +    if (!string)
 +        return NULL;
 +
 +    state.ptr = state.start;
 +
 +    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
 +
 +    status = sre_match(&state, PatternObject_GetCode(self));
 +
 +    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
 +    if (PyErr_Occurred())
 +        return NULL;
 +
 +    state_fini(&state);
 +
 +    return (PyObject *)pattern_new_match(self, &state, status);
 +}
 +
  static PyObject*
 -pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
 +pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
  {
      SRE_STATE state;
 -    int status;
 +    Py_ssize_t status;
  
-     PyObject* string;
+     PyObject *string = NULL, *string2 = NULL;
      Py_ssize_t start = 0;
      Py_ssize_t end = PY_SSIZE_T_MAX;
-     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
-     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
-                                      &string, &start, &end))
+     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
 -    if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:match", kwlist,
++    if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
+                                      &string, &start, &end, &string2))
+         return NULL;
+     string = fix_string_param(string, string2, "pattern");
+     if (!string)
          return NULL;
  
      string = state_init(&state, self, string, start, end);
@@@ -635,14 -1945,18 +629,18 @@@ static PyObject
  pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
  {
      SRE_STATE state;
 -    int status;
 +    Py_ssize_t status;
  
-     PyObject* string;
+     PyObject *string = NULL, *string2 = NULL;
      Py_ssize_t start = 0;
      Py_ssize_t end = PY_SSIZE_T_MAX;
-     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
-     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
-                                      &string, &start, &end))
+     static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
+     if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
+                                      &string, &start, &end, &string2))
+         return NULL;
+     string = fix_string_param(string, string2, "pattern");
+     if (!string)
          return NULL;
  
      string = state_init(&state, self, string, start, end);
@@@ -715,15 -2081,19 +713,19 @@@ pattern_findall(PatternObject* self, Py
  {
      SRE_STATE state;
      PyObject* list;
 -    int status;
 +    Py_ssize_t status;
      Py_ssize_t i, b, e;
  
-     PyObject* string;
+     PyObject *string = NULL, *string2 = NULL;
      Py_ssize_t start = 0;
      Py_ssize_t end = PY_SSIZE_T_MAX;
-     static char* kwlist[] = { "source", "pos", "endpos", NULL };
-     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
-                                      &string, &start, &end))
+     static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
+     if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
+                                      &string, &start, &end, &string2))
+         return NULL;
+     string = fix_string_param(string, string2, "source");
+     if (!string)
          return NULL;
  
      string = state_init(&state, self, string, start, end);
@@@ -1212,90 -2581,10 +1218,94 @@@ pattern_deepcopy(PatternObject* self, P
  #endif
  }
  
 +static PyObject *
 +pattern_repr(PatternObject *obj)
 +{
 +    static const struct {
 +        const char *name;
 +        int value;
 +    } flag_names[] = {
 +        {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
 +        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
 +        {"re.LOCALE", SRE_FLAG_LOCALE},
 +        {"re.MULTILINE", SRE_FLAG_MULTILINE},
 +        {"re.DOTALL", SRE_FLAG_DOTALL},
 +        {"re.UNICODE", SRE_FLAG_UNICODE},
 +        {"re.VERBOSE", SRE_FLAG_VERBOSE},
 +        {"re.DEBUG", SRE_FLAG_DEBUG},
 +        {"re.ASCII", SRE_FLAG_ASCII},
 +    };
 +    PyObject *result = NULL;
 +    PyObject *flag_items;
 +    int i;
 +    int flags = obj->flags;
 +
 +    /* Omit re.UNICODE for valid string patterns. */
 +    if (obj->isbytes == 0 &&
 +        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
 +         SRE_FLAG_UNICODE)
 +        flags &= ~SRE_FLAG_UNICODE;
 +
 +    flag_items = PyList_New(0);
 +    if (!flag_items)
 +        return NULL;
 +
 +    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
 +        if (flags & flag_names[i].value) {
 +            PyObject *item = PyUnicode_FromString(flag_names[i].name);
 +            if (!item)
 +                goto done;
 +
 +            if (PyList_Append(flag_items, item) < 0) {
 +                Py_DECREF(item);
 +                goto done;
 +            }
 +            Py_DECREF(item);
 +            flags &= ~flag_names[i].value;
 +        }
 +    }
 +    if (flags) {
 +        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
 +        if (!item)
 +            goto done;
 +
 +        if (PyList_Append(flag_items, item) < 0) {
 +            Py_DECREF(item);
 +            goto done;
 +        }
 +        Py_DECREF(item);
 +    }
 +
 +    if (PyList_Size(flag_items) > 0) {
 +        PyObject *flags_result;
 +        PyObject *sep = PyUnicode_FromString("|");
 +        if (!sep)
 +            goto done;
 +        flags_result = PyUnicode_Join(sep, flag_items);
 +        Py_DECREF(sep);
 +        if (!flags_result)
 +            goto done;
 +        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
 +                                      obj->pattern, flags_result);
 +        Py_DECREF(flags_result);
 +    }
 +    else {
 +        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
 +    }
 +
 +done:
 +    Py_DECREF(flag_items);
 +    return result;
 +}
 +
+ PyDoc_STRVAR(pattern_match_doc,
+ "match(string[, pos[, endpos]]) -> match object or None.\n\
+     Matches zero or more characters at the beginning of the string");
 +PyDoc_STRVAR(pattern_fullmatch_doc,
 +"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
 +    Matches against all of the string");
 +
  PyDoc_STRVAR(pattern_search_doc,
  "search(string[, pos[, endpos]]) -> match object or None.\n\
      Scan through string looking for a match, and return a corresponding\n\
@@@ -1329,9 -2618,8 +1339,10 @@@ PyDoc_STRVAR(pattern_subn_doc
  PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
  
  static PyMethodDef pattern_methods[] = {
-     PATTERN_MATCH_METHODDEF
+     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
+         pattern_match_doc},
 +    {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
 +        pattern_fullmatch_doc},
      {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
          pattern_search_doc},
      {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,