bpo-30397: Add re.Pattern and re.Match. (#1646)

author Serhiy Storchaka <storchaka@gmail.com>

Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)

committer GitHub <noreply@github.com>

Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)
committer GitHub <noreply@github.com>
Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)
diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst

index d9b7c9091d467a18134e9945175dd0c50ec209ad..3121a9ffb82a908d44d752c89b99ec8a8f6adf2b 100644 (file)
--- a/Doc/howto/regex.rst
+++ b/Doc/howto/regex.rst
@@ -402,7 +402,7 @@ should store the result in a variable for later use. ::
  
     >>> m = p.match('tempo')
     >>> m  #doctest: +ELLIPSIS
-   <_sre.SRE_Match object; span=(0, 5), match='tempo'>
+   <re.Match object; span=(0, 5), match='tempo'>
  
  Now you can query the :ref:`match object <match-objects>` for information
  about the matching string.  :ref:`match object <match-objects>` instances
@@ -441,7 +441,7 @@ case. ::
     >>> print(p.match('::: message'))
     None
     >>> m = p.search('::: message'); print(m)  #doctest: +ELLIPSIS
-   <_sre.SRE_Match object; span=(4, 11), match='message'>
+   <re.Match object; span=(4, 11), match='message'>
     >>> m.group()
     'message'
     >>> m.span()
@@ -493,7 +493,7 @@ the RE string added as the first argument, and still return either ``None`` or a
     >>> print(re.match(r'From\s+', 'Fromage amk'))
     None
     >>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998')  #doctest: +ELLIPSIS
-   <_sre.SRE_Match object; span=(0, 5), match='From '>
+   <re.Match object; span=(0, 5), match='From '>
  
  Under the hood, these functions simply create a pattern object for you
  and call the appropriate method on it.  They also store the compiled
@@ -685,7 +685,7 @@ given location, they can obviously be matched an infinite number of times.
     line, the RE to use is ``^From``. ::
  
        >>> print(re.search('^From', 'From Here to Eternity'))  #doctest: +ELLIPSIS
-      <_sre.SRE_Match object; span=(0, 4), match='From'>
+      <re.Match object; span=(0, 4), match='From'>
        >>> print(re.search('^From', 'Reciting From Memory'))
        None
  
@@ -697,11 +697,11 @@ given location, they can obviously be matched an infinite number of times.
     or any location followed by a newline character.     ::
  
        >>> print(re.search('}$', '{block}'))  #doctest: +ELLIPSIS
-      <_sre.SRE_Match object; span=(6, 7), match='}'>
+      <re.Match object; span=(6, 7), match='}'>
        >>> print(re.search('}$', '{block} '))
        None
        >>> print(re.search('}$', '{block}\n'))  #doctest: +ELLIPSIS
-      <_sre.SRE_Match object; span=(6, 7), match='}'>
+      <re.Match object; span=(6, 7), match='}'>
  
     To match a literal ``'$'``, use ``\$`` or enclose it inside a character class,
     as in  ``[$]``.
@@ -726,7 +726,7 @@ given location, they can obviously be matched an infinite number of times.
  
        >>> p = re.compile(r'\bclass\b')
        >>> print(p.search('no class at all'))  #doctest: +ELLIPSIS
-      <_sre.SRE_Match object; span=(3, 8), match='class'>
+      <re.Match object; span=(3, 8), match='class'>
        >>> print(p.search('the declassified algorithm'))
        None
        >>> print(p.search('one subclass is'))
@@ -744,7 +744,7 @@ given location, they can obviously be matched an infinite number of times.
        >>> print(p.search('no class at all'))
        None
        >>> print(p.search('\b' + 'class' + '\b'))  #doctest: +ELLIPSIS
-      <_sre.SRE_Match object; span=(0, 7), match='\x08class\x08'>
+      <re.Match object; span=(0, 7), match='\x08class\x08'>
  
     Second, inside a character class, where there's no use for this assertion,
     ``\b`` represents the backspace character, for compatibility with Python's
diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst

index 634c26e95702e956d7c078afde129fcc2c93cfab..abf952355051c562f99b56709bf18954e9963e57 100644 (file)
--- a/Doc/library/fnmatch.rst
+++ b/Doc/library/fnmatch.rst
@@ -86,7 +86,7 @@ patterns.
        '(?s:.*\\.txt)\\Z'
        >>> reobj = re.compile(regex)
        >>> reobj.match('foobar.txt')
-      <_sre.SRE_Match object; span=(0, 10), match='foobar.txt'>
+      <re.Match object; span=(0, 10), match='foobar.txt'>
  
  
  .. seealso::
diff --git a/Doc/library/re.rst b/Doc/library/re.rst

index 138e7d87996be83355c3946041313ebd24cac5ad..7efdd5d9d655da5261ae59afff6cf4312bece09b 100644 (file)
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -492,7 +492,7 @@ form.
  
     Compile a regular expression pattern into a :ref:`regular expression object
     <re-objects>`, which can be used for matching using its
-   :func:`~regex.match`, :func:`~regex.search` and other methods, described
+   :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described
     below.
  
     The expression's behaviour can be modified by specifying a *flags* value.
@@ -747,7 +747,7 @@ form.
        >>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)
        'Baked Beans & Spam'
  
-   The pattern may be a string or an RE object.
+   The pattern may be a string or a :class:`Pattern` object.
  
     The optional argument *count* is the maximum number of pattern occurrences to be
     replaced; *count* must be a non-negative integer.  If omitted or zero, all
@@ -861,7 +861,7 @@ Regular Expression Objects
  Compiled regular expression objects support the following methods and
  attributes:
  
-.. method:: regex.search(string[, pos[, endpos]])
+.. method:: Pattern.search(string[, pos[, endpos]])
  
     Scan through *string* looking for the first location where this regular
     expression produces a match, and return a corresponding :ref:`match object
@@ -884,11 +884,11 @@ attributes:
  
     >>> pattern = re.compile("d")
     >>> pattern.search("dog")     # Match at index 0
-   <_sre.SRE_Match object; span=(0, 1), match='d'>
+   <re.Match object; span=(0, 1), match='d'>
     >>> pattern.search("dog", 1)  # No match; search doesn't include the "d"
  
  
-.. method:: regex.match(string[, pos[, endpos]])
+.. method:: Pattern.match(string[, pos[, endpos]])
  
     If zero or more characters at the *beginning* of *string* match this regular
     expression, return a corresponding :ref:`match object <match-objects>`.
@@ -896,86 +896,86 @@ attributes:
     different from a zero-length match.
  
     The optional *pos* and *endpos* parameters have the same meaning as for the
-   :meth:`~regex.search` method.
+   :meth:`~Pattern.search` method.
  
     >>> pattern = re.compile("o")
     >>> pattern.match("dog")      # No match as "o" is not at the start of "dog".
     >>> pattern.match("dog", 1)   # Match as "o" is the 2nd character of "dog".
-   <_sre.SRE_Match object; span=(1, 2), match='o'>
+   <re.Match object; span=(1, 2), match='o'>
  
     If you want to locate a match anywhere in *string*, use
-   :meth:`~regex.search` instead (see also :ref:`search-vs-match`).
+   :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`).
  
  
-.. method:: regex.fullmatch(string[, pos[, endpos]])
+.. method:: Pattern.fullmatch(string[, pos[, endpos]])
  
     If the whole *string* matches this regular expression, return a corresponding
     :ref:`match object <match-objects>`.  Return ``None`` if the string does not
     match the pattern; note that this is different from a zero-length match.
  
     The optional *pos* and *endpos* parameters have the same meaning as for the
-   :meth:`~regex.search` method.
+   :meth:`~Pattern.search` method.
  
     >>> pattern = re.compile("o[gh]")
     >>> pattern.fullmatch("dog")      # No match as "o" is not at the start of "dog".
     >>> pattern.fullmatch("ogre")     # No match as not the full string matches.
     >>> pattern.fullmatch("doggie", 1, 3)   # Matches within given limits.
-   <_sre.SRE_Match object; span=(1, 3), match='og'>
+   <re.Match object; span=(1, 3), match='og'>
  
     .. versionadded:: 3.4
  
  
-.. method:: regex.split(string, maxsplit=0)
+.. method:: Pattern.split(string, maxsplit=0)
  
     Identical to the :func:`split` function, using the compiled pattern.
  
  
-.. method:: regex.findall(string[, pos[, endpos]])
+.. method:: Pattern.findall(string[, pos[, endpos]])
  
     Similar to the :func:`findall` function, using the compiled pattern, but
     also accepts optional *pos* and *endpos* parameters that limit the search
     region like for :meth:`match`.
  
  
-.. method:: regex.finditer(string[, pos[, endpos]])
+.. method:: Pattern.finditer(string[, pos[, endpos]])
  
     Similar to the :func:`finditer` function, using the compiled pattern, but
     also accepts optional *pos* and *endpos* parameters that limit the search
     region like for :meth:`match`.
  
  
-.. method:: regex.sub(repl, string, count=0)
+.. method:: Pattern.sub(repl, string, count=0)
  
     Identical to the :func:`sub` function, using the compiled pattern.
  
  
-.. method:: regex.subn(repl, string, count=0)
+.. method:: Pattern.subn(repl, string, count=0)
  
     Identical to the :func:`subn` function, using the compiled pattern.
  
  
-.. attribute:: regex.flags
+.. attribute:: Pattern.flags
  
     The regex matching flags.  This is a combination of the flags given to
     :func:`.compile`, any ``(?...)`` inline flags in the pattern, and implicit
     flags such as :data:`UNICODE` if the pattern is a Unicode string.
  
  
-.. attribute:: regex.groups
+.. attribute:: Pattern.groups
  
     The number of capturing groups in the pattern.
  
  
-.. attribute:: regex.groupindex
+.. attribute:: Pattern.groupindex
  
     A dictionary mapping any symbolic group names defined by ``(?P<id>)`` to group
     numbers.  The dictionary is empty if no symbolic groups were used in the
     pattern.
  
  
-.. attribute:: regex.pattern
+.. attribute:: Pattern.pattern
  
-   The pattern string from which the RE object was compiled.
+   The pattern string from which the pattern object was compiled.
  
  
  .. versionchanged:: 3.7
@@ -989,7 +989,7 @@ Match Objects
  -------------
  
  Match objects always have a boolean value of ``True``.
-Since :meth:`~regex.match` and :meth:`~regex.search` return ``None``
+Since :meth:`~Pattern.match` and :meth:`~Pattern.search` return ``None``
  when there is no match, you can test whether there was a match with a simple
  ``if`` statement::
  
@@ -1000,10 +1000,10 @@ when there is no match, you can test whether there was a match with a simple
  Match objects support the following methods and attributes:
  
  
-.. method:: match.expand(template)
+.. method:: Match.expand(template)
  
     Return the string obtained by doing backslash substitution on the template
-   string *template*, as done by the :meth:`~regex.sub` method.
+   string *template*, as done by the :meth:`~Pattern.sub` method.
     Escapes such as ``\n`` are converted to the appropriate characters,
     and numeric backreferences (``\1``, ``\2``) and named backreferences
     (``\g<1>``, ``\g<name>``) are replaced by the contents of the
@@ -1012,7 +1012,7 @@ Match objects support the following methods and attributes:
     .. versionchanged:: 3.5
        Unmatched groups are replaced with an empty string.
  
-.. method:: match.group([group1, ...])
+.. method:: Match.group([group1, ...])
  
     Returns one or more subgroups of the match.  If there is a single argument, the
     result is a single string; if there are multiple arguments, the result is a
@@ -1063,7 +1063,7 @@ Match objects support the following methods and attributes:
        'c3'
  
  
-.. method:: match.__getitem__(g)
+.. method:: Match.__getitem__(g)
  
     This is identical to ``m.group(g)``.  This allows easier access to
     an individual group from a match:
@@ -1079,7 +1079,7 @@ Match objects support the following methods and attributes:
     .. versionadded:: 3.6
  
  
-.. method:: match.groups(default=None)
+.. method:: Match.groups(default=None)
  
     Return a tuple containing all the subgroups of the match, from 1 up to however
     many groups are in the pattern.  The *default* argument is used for groups that
@@ -1102,7 +1102,7 @@ Match objects support the following methods and attributes:
        ('24', '0')
  
  
-.. method:: match.groupdict(default=None)
+.. method:: Match.groupdict(default=None)
  
     Return a dictionary containing all the *named* subgroups of the match, keyed by
     the subgroup name.  The *default* argument is used for groups that did not
@@ -1113,8 +1113,8 @@ Match objects support the following methods and attributes:
        {'first_name': 'Malcolm', 'last_name': 'Reynolds'}
  
  
-.. method:: match.start([group])
-            match.end([group])
+.. method:: Match.start([group])
+            Match.end([group])
  
     Return the indices of the start and end of the substring matched by *group*;
     *group* defaults to zero (meaning the whole matched substring). Return ``-1`` if
@@ -1137,28 +1137,28 @@ Match objects support the following methods and attributes:
        'tony@tiger.net'
  
  
-.. method:: match.span([group])
+.. method:: Match.span([group])
  
     For a match *m*, return the 2-tuple ``(m.start(group), m.end(group))``. Note
     that if *group* did not contribute to the match, this is ``(-1, -1)``.
     *group* defaults to zero, the entire match.
  
  
-.. attribute:: match.pos
+.. attribute:: Match.pos
  
-   The value of *pos* which was passed to the :meth:`~regex.search` or
-   :meth:`~regex.match` method of a :ref:`regex object <re-objects>`.  This is
+   The value of *pos* which was passed to the :meth:`~Pattern.search` or
+   :meth:`~Pattern.match` method of a :ref:`regex object <re-objects>`.  This is
     the index into the string at which the RE engine started looking for a match.
  
  
-.. attribute:: match.endpos
+.. attribute:: Match.endpos
  
-   The value of *endpos* which was passed to the :meth:`~regex.search` or
-   :meth:`~regex.match` method of a :ref:`regex object <re-objects>`.  This is
+   The value of *endpos* which was passed to the :meth:`~Pattern.search` or
+   :meth:`~Pattern.match` method of a :ref:`regex object <re-objects>`.  This is
     the index into the string beyond which the RE engine will not go.
  
  
-.. attribute:: match.lastindex
+.. attribute:: Match.lastindex
  
     The integer index of the last matched capturing group, or ``None`` if no group
     was matched at all. For example, the expressions ``(a)b``, ``((a)(b))``, and
@@ -1167,21 +1167,21 @@ Match objects support the following methods and attributes:
     string.
  
  
-.. attribute:: match.lastgroup
+.. attribute:: Match.lastgroup
  
     The name of the last matched capturing group, or ``None`` if the group didn't
     have a name, or if no group was matched at all.
  
  
-.. attribute:: match.re
+.. attribute:: Match.re
  
-   The regular expression object whose :meth:`~regex.match` or
-   :meth:`~regex.search` method produced this match instance.
+   The regular expression object whose :meth:`~Pattern.match` or
+   :meth:`~Pattern.search` method produced this match instance.
  
  
-.. attribute:: match.string
+.. attribute:: Match.string
  
-   The string passed to :meth:`~regex.match` or :meth:`~regex.search`.
+   The string passed to :meth:`~Pattern.match` or :meth:`~Pattern.search`.
  
  
  .. versionchanged:: 3.7
@@ -1234,7 +1234,7 @@ To match this with a regular expression, one could use backreferences as such:
     "<Match: '354aa', groups=('a',)>"
  
  To find out what card the pair consists of, one could use the
-:meth:`~match.group` method of the match object in the following manner:
+:meth:`~Match.group` method of the match object in the following manner:
  
  .. doctest::
  
@@ -1314,7 +1314,7 @@ For example::
  
     >>> re.match("c", "abcdef")    # No match
     >>> re.search("c", "abcdef")   # Match
-   <_sre.SRE_Match object; span=(2, 3), match='c'>
+   <re.Match object; span=(2, 3), match='c'>
  
  Regular expressions beginning with ``'^'`` can be used with :func:`search` to
  restrict the match at the beginning of the string::
@@ -1322,7 +1322,7 @@ restrict the match at the beginning of the string::
     >>> re.match("c", "abcdef")    # No match
     >>> re.search("^c", "abcdef")  # No match
     >>> re.search("^a", "abcdef")  # Match
-   <_sre.SRE_Match object; span=(0, 1), match='a'>
+   <re.Match object; span=(0, 1), match='a'>
  
  Note however that in :const:`MULTILINE` mode :func:`match` only matches at the
  beginning of the string, whereas using :func:`search` with a regular expression
@@ -1330,7 +1330,7 @@ beginning with ``'^'`` will match at the beginning of each line.
  
     >>> re.match('X', 'A\nB\nX', re.MULTILINE)  # No match
     >>> re.search('^X', 'A\nB\nX', re.MULTILINE)  # Match
-   <_sre.SRE_Match object; span=(4, 5), match='X'>
+   <re.Match object; span=(4, 5), match='X'>
  
  
  Making a Phonebook
@@ -1449,9 +1449,9 @@ another one to escape it.  For example, the two following lines of code are
  functionally identical:
  
     >>> re.match(r"\W(.)\1\W", " ff ")
-   <_sre.SRE_Match object; span=(0, 4), match=' ff '>
+   <re.Match object; span=(0, 4), match=' ff '>
     >>> re.match("\\W(.)\\1\\W", " ff ")
-   <_sre.SRE_Match object; span=(0, 4), match=' ff '>
+   <re.Match object; span=(0, 4), match=' ff '>
  
  When one wants to match a literal backslash, it must be escaped in the regular
  expression.  With raw string notation, this means ``r"\\"``.  Without raw string
@@ -1459,9 +1459,9 @@ notation, one must use ``"\\\\"``, making the following lines of code
  functionally identical:
  
     >>> re.match(r"\\", r"\\")
-   <_sre.SRE_Match object; span=(0, 1), match='\\'>
+   <re.Match object; span=(0, 1), match='\\'>
     >>> re.match("\\\\", r"\\")
-   <_sre.SRE_Match object; span=(0, 1), match='\\'>
+   <re.Match object; span=(0, 1), match='\\'>
  
  
  Writing a Tokenizer
diff --git a/Lib/idlelib/idle_test/test_calltips.py b/Lib/idlelib/idle_test/test_calltips.py

index fa92ece78ee6b9315a14c90ab7a97bfd169e8c12..a58229d36ede70b29ab6199f5bdb0a329f1e8f40 100644 (file)
--- a/Lib/idlelib/idle_test/test_calltips.py
+++ b/Lib/idlelib/idle_test/test_calltips.py
@@ -74,7 +74,7 @@ class Get_signatureTest(unittest.TestCase):
  non-overlapping occurrences of the pattern in string by the
  replacement repl.  repl can be either a string or a callable;
  if a string, backslash escapes in it are processed.  If it is
-a callable, it's passed the match object and must return''')
+a callable, it's passed the Match object and must return''')
          gtest(p.sub, '''(repl, string, count=0)\nReturn the string obtained by replacing the leftmost non-overlapping occurrences o...''')
  
      def test_signature_wrap(self):
diff --git a/Lib/re.py b/Lib/re.py

index c194dba844a923d6ebd2e9257c0461dd1cc2e62b..d772979f91e1af4f9a636c75f56a789b473376bb 100644 (file)
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -92,8 +92,8 @@ This module exports the following functions:
      subn      Same as sub, but also return the number of substitutions made.
      split     Split a string by the occurrences of a pattern.
      findall   Find all occurrences of a pattern in a string.
-    finditer  Return an iterator yielding a match object for each match.
-    compile   Compile a pattern into a RegexObject.
+    finditer  Return an iterator yielding a Match object for each match.
+    compile   Compile a pattern into a Pattern object.
      purge     Clear the regular expression cache.
      escape    Backslash all non-alphanumerics in a string.
  
@@ -139,7 +139,7 @@ except ImportError:
  __all__ = [
      "match", "fullmatch", "search", "sub", "subn", "split",
      "findall", "finditer", "compile", "purge", "template", "escape",
-    "error", "A", "I", "L", "M", "S", "X", "U",
+    "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
      "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
      "UNICODE",
  ]
@@ -175,17 +175,17 @@ error = sre_compile.error
  
  def match(pattern, string, flags=0):
      """Try to apply the pattern at the start of the string, returning
-    a match object, or None if no match was found."""
+    a Match object, or None if no match was found."""
      return _compile(pattern, flags).match(string)
  
  def fullmatch(pattern, string, flags=0):
      """Try to apply the pattern to all of the string, returning
-    a match object, or None if no match was found."""
+    a Match object, or None if no match was found."""
      return _compile(pattern, flags).fullmatch(string)
  
  def search(pattern, string, flags=0):
      """Scan through string looking for a match to the pattern, returning
-    a match object, or None if no match was found."""
+    a Match object, or None if no match was found."""
      return _compile(pattern, flags).search(string)
  
  def sub(pattern, repl, string, count=0, flags=0):
@@ -193,7 +193,7 @@ def sub(pattern, repl, string, count=0, flags=0):
      non-overlapping occurrences of the pattern in string by the
      replacement repl.  repl can be either a string or a callable;
      if a string, backslash escapes in it are processed.  If it is
-    a callable, it's passed the match object and must return
+    a callable, it's passed the Match object and must return
      a replacement string to be used."""
      return _compile(pattern, flags).sub(repl, string, count)
  
@@ -204,7 +204,7 @@ def subn(pattern, repl, string, count=0, flags=0):
      string by the replacement repl.  number is the number of
      substitutions that were made. repl can be either a string or a
      callable; if a string, backslash escapes in it are processed.
-    If it is a callable, it's passed the match object and must
+    If it is a callable, it's passed the Match object and must
      return a replacement string to be used."""
      return _compile(pattern, flags).subn(repl, string, count)
  
@@ -230,13 +230,13 @@ def findall(pattern, string, flags=0):
  
  def finditer(pattern, string, flags=0):
      """Return an iterator over all non-overlapping matches in the
-    string.  For each match, the iterator returns a match object.
+    string.  For each match, the iterator returns a Match object.
  
      Empty matches are included in the result."""
      return _compile(pattern, flags).finditer(string)
  
  def compile(pattern, flags=0):
-    "Compile a regular expression pattern, returning a pattern object."
+    "Compile a regular expression pattern, returning a Pattern object."
      return _compile(pattern, flags)
  
  def purge():
@@ -245,7 +245,7 @@ def purge():
      _compile_repl.cache_clear()
  
  def template(pattern, flags=0):
-    "Compile a template pattern, returning a pattern object"
+    "Compile a template pattern, returning a Pattern object"
      return _compile(pattern, flags|T)
  
  # SPECIAL_CHARS
@@ -264,13 +264,14 @@ def escape(pattern):
          pattern = str(pattern, 'latin1')
          return pattern.translate(_special_chars_map).encode('latin1')
  
+Pattern = type(sre_compile.compile('', 0))
+Match = type(sre_compile.compile('', 0).match(''))
+
  # --------------------------------------------------------------------
  # internals
  
  _cache = OrderedDict()
  
-_pattern_type = type(sre_compile.compile("", 0))
-
  _MAXCACHE = 512
  def _compile(pattern, flags):
      # internal: compile pattern
@@ -278,7 +279,7 @@ def _compile(pattern, flags):
          return _cache[type(pattern), pattern, flags]
      except KeyError:
          pass
-    if isinstance(pattern, _pattern_type):
+    if isinstance(pattern, Pattern):
          if flags:
              raise ValueError(
                  "cannot process flags argument with a compiled pattern")
@@ -301,12 +302,12 @@ def _compile_repl(repl, pattern):
      return sre_parse.parse_template(repl, pattern)
  
  def _expand(pattern, match, template):
-    # internal: match.expand implementation hook
+    # internal: Match.expand implementation hook
      template = sre_parse.parse_template(template, pattern)
      return sre_parse.expand_template(template, match)
  
  def _subx(pattern, template):
-    # internal: pattern.sub/subn implementation helper
+    # internal: Pattern.sub/subn implementation helper
      template = _compile_repl(template, pattern)
      if not template[0] and len(template[1]) == 1:
          # literal replacement
@@ -322,7 +323,7 @@ import copyreg
  def _pickle(p):
      return _compile, (p.pattern, p.flags)
  
-copyreg.pickle(_pattern_type, _pickle, _compile)
+copyreg.pickle(Pattern, _pickle, _compile)
  
  # --------------------------------------------------------------------
  # experimental stuff (see python-dev discussions for details)
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py

index 0261e9e956cbce94ed705f16bf117f404daa0a35..1daa7bd00f4347d25252a2dac3414aa13ed60f25 100644 (file)
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -32,6 +32,8 @@ class error(Exception):
          colno: The column corresponding to pos (may be None)
      """
  
+    __module__ = 're'
+
      def __init__(self, msg, pattern=None, pos=None):
          self.msg = msg
          self.pattern = pattern
diff --git a/Lib/telnetlib.py b/Lib/telnetlib.py

index b0863b1cbd60344254992ba11aad25eb973e63b1..b9d45b48e79fb6a3ec9139aa28119816efa073f5 100644 (file)
--- a/Lib/telnetlib.py
+++ b/Lib/telnetlib.py
@@ -585,12 +585,12 @@ class Telnet:
          """Read until one from a list of a regular expressions matches.
  
          The first argument is a list of regular expressions, either
-        compiled (re.RegexObject instances) or uncompiled (strings).
+        compiled (re.Pattern instances) or uncompiled (strings).
          The optional second argument is a timeout, in seconds; default
          is no timeout.
  
          Return a tuple of three items: the index in the list of the
-        first regular expression that matches; the match object
+        first regular expression that matches; the re.Match object
          returned; and the text read up till and including the match.
  
          If EOF is read and no text was read, raise EOFError.
diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py

index 91a0319a7352b04f5ddc56ad17473be0081e7dfa..437fdd2be8d85ac46aa12d328bfb08fabc005a86 100644 (file)
--- a/Lib/test/test_optparse.py
+++ b/Lib/test/test_optparse.py
@@ -24,8 +24,6 @@ from optparse import make_option, Option, \
  from optparse import _match_abbrev
  from optparse import _parse_num
  
-retype = type(re.compile(''))
-
  class InterceptedError(Exception):
      def __init__(self,
                   error_message=None,
@@ -107,7 +105,7 @@ Args were %(args)s.""" % locals ())
              func(*args, **kwargs)
          except expected_exception as err:
              actual_message = str(err)
-            if isinstance(expected_message, retype):
+            if isinstance(expected_message, re.Pattern):
                  self.assertTrue(expected_message.search(actual_message),
                               """\
  expected exception message pattern:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index e9c07a04f570140e8e8b033b982d4cbb224ae82c..9cb426a04dc2940bcf57d6a2829139e14aa0fff5 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1596,9 +1596,9 @@ class ReTests(unittest.TestCase):
      def test_compile(self):
          # Test return value when given string and pattern as parameter
          pattern = re.compile('random pattern')
-        self.assertIsInstance(pattern, re._pattern_type)
+        self.assertIsInstance(pattern, re.Pattern)
          same_pattern = re.compile(pattern)
-        self.assertIsInstance(same_pattern, re._pattern_type)
+        self.assertIsInstance(same_pattern, re.Pattern)
          self.assertIs(same_pattern, pattern)
          # Test behaviour when not given a string or pattern as parameter
          self.assertRaises(TypeError, re.compile, 0)
diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py

index f19afef48d73cd6d48c2221c87777f3fc8671b26..c48a63c575f28bf1cbdd289f443f8e517aa82d13 100644 (file)
--- a/Lib/unittest/case.py
+++ b/Lib/unittest/case.py
@@ -1273,7 +1273,7 @@ class TestCase(object):
  
          Args:
              expected_exception: Exception class expected to be raised.
-            expected_regex: Regex (re pattern object or string) expected
+            expected_regex: Regex (re.Pattern object or string) expected
                      to be found in error message.
              args: Function to be called and extra positional args.
              kwargs: Extra kwargs.
@@ -1292,7 +1292,7 @@ class TestCase(object):
  
          Args:
              expected_warning: Warning class expected to be triggered.
-            expected_regex: Regex (re pattern object or string) expected
+            expected_regex: Regex (re.Pattern object or string) expected
                      to be found in error message.
              args: Function to be called and extra positional args.
              kwargs: Extra kwargs.
diff --git a/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst b/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst

new file mode 100644 (file)

index 0000000..2f1f762
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst
@@ -0,0 +1,3 @@
+The types of compiled regular objects and match objects are now exposed as
+`re.Pattern` and `re.Match`.  This adds information in pydoc output for the
+re module.
diff --git a/Modules/_sre.c b/Modules/_sre.c

index 6873f1db438d4113f2dc08536b1632d7d0d91864..c42ab2668fd8bdbcb9a1cf63bcb492cdce41298c 100644 (file)
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -630,13 +630,13 @@ _sre.SRE_Pattern.fullmatch
      pos: Py_ssize_t = 0
      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
  
-Matches against all of the string
+Matches against all of the string.
  [clinic start generated code]*/
  
  static PyObject *
  _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
                                  Py_ssize_t pos, Py_ssize_t endpos)
-/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
+/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
  {
      SRE_STATE state;
      Py_ssize_t status;
@@ -1341,7 +1341,7 @@ done:
      return result;
  }
  
-PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
+PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
  
  /* PatternObject's 'groupindex' method. */
  static PyObject *
@@ -2221,12 +2221,12 @@ _sre.SRE_Match.span
      group: object(c_default="NULL") = 0
      /
  
-For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
+For match object m, return the 2-tuple (m.start(group), m.end(group)).
  [clinic start generated code]*/
  
  static PyObject *
  _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
-/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
+/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
  {
      Py_ssize_t index = match_getindex(self, group);
  
@@ -2625,15 +2625,18 @@ static PyGetSetDef pattern_getset[] = {
  
  #define PAT_OFF(x) offsetof(PatternObject, x)
  static PyMemberDef pattern_members[] = {
-    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
-    {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
-    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
+    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
+     "The pattern string from which the RE object was compiled."},
+    {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
+     "The regex matching flags."},
+    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
+     "The number of capturing groups in the pattern."},
      {NULL}  /* Sentinel */
  };
  
  static PyTypeObject Pattern_Type = {
      PyVarObject_HEAD_INIT(NULL, 0)
-    "_" SRE_MODULE ".SRE_Pattern",
+    "re.Pattern",
      sizeof(PatternObject), sizeof(SRE_CODE),
      (destructor)pattern_dealloc,        /* tp_dealloc */
      0,                                  /* tp_print */
@@ -2685,18 +2688,24 @@ static PyMethodDef match_methods[] = {
  };
  
  static PyGetSetDef match_getset[] = {
-    {"lastindex", (getter)match_lastindex_get, (setter)NULL},
-    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
+    {"lastindex", (getter)match_lastindex_get, (setter)NULL,
+     "The integer index of the last matched capturing group."},
+    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
+     "The name of the last matched capturing group."},
      {"regs",      (getter)match_regs_get,      (setter)NULL},
      {NULL}
  };
  
  #define MATCH_OFF(x) offsetof(MatchObject, x)
  static PyMemberDef match_members[] = {
-    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
-    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
-    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
-    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
+    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
+     "The string passed to match() or search()."},
+    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
+     "The regular expression object."},
+    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
+     "The index into the string at which the RE engine started looking for a match."},
+    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
+     "The index into the string beyond which the RE engine will not go."},
      {NULL}
  };
  
@@ -2705,7 +2714,7 @@ static PyMemberDef match_members[] = {
  
  static PyTypeObject Match_Type = {
      PyVarObject_HEAD_INIT(NULL,0)
-    "_" SRE_MODULE ".SRE_Match",
+    "re.Match",
      sizeof(MatchObject), sizeof(Py_ssize_t),
      (destructor)match_dealloc,  /* tp_dealloc */
      0,                          /* tp_print */
diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h

index 8ed288e96a3d398a83096ddd66395456a2df5831..6eea0c8b252a853d37758516e74875e6565f7d9d 100644 (file)
--- a/Modules/clinic/_sre.c.h
+++ b/Modules/clinic/_sre.c.h
@@ -190,7 +190,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__,
  "fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n"
  "--\n"
  "\n"
-"Matches against all of the string");
+"Matches against all of the string.");
  
  #define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF    \
      {"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__},
@@ -682,7 +682,7 @@ PyDoc_STRVAR(_sre_SRE_Match_span__doc__,
  "span($self, group=0, /)\n"
  "--\n"
  "\n"
-"For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
+"For match object m, return the 2-tuple (m.start(group), m.end(group)).");
  
  #define _SRE_SRE_MATCH_SPAN_METHODDEF    \
      {"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__},
@@ -765,4 +765,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
  {
      return _sre_SRE_Scanner_search_impl(self);
  }
-/*[clinic end generated code: output=6e3fb17fef1be436 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=1e6a1be31302df09 input=a9049054013a1b77]*/
author	Serhiy Storchaka <storchaka@gmail.com>
	Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)
committer	GitHub <noreply@github.com>
	Wed, 4 Oct 2017 17:09:49 +0000 (20:09 +0300)
Doc/howto/regex.rst		patch \| blob \| history
Doc/library/fnmatch.rst		patch \| blob \| history
Doc/library/re.rst		patch \| blob \| history
Lib/idlelib/idle_test/test_calltips.py		patch \| blob \| history
Lib/re.py		patch \| blob \| history
Lib/sre_constants.py		patch \| blob \| history
Lib/telnetlib.py		patch \| blob \| history
Lib/test/test_optparse.py		patch \| blob \| history
Lib/test/test_re.py		patch \| blob \| history
Lib/unittest/case.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst	[new file with mode: 0644]	patch \| blob
Modules/_sre.c		patch \| blob \| history
Modules/clinic/_sre.c.h		patch \| blob \| history