]> granicus.if.org Git - python/commitdiff
Implement rich comparison for _sre.SRE_Pattern
authorVictor Stinner <victor.stinner@gmail.com>
Mon, 21 Nov 2016 15:35:08 +0000 (16:35 +0100)
committerVictor Stinner <victor.stinner@gmail.com>
Mon, 21 Nov 2016 15:35:08 +0000 (16:35 +0100)
Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by
re.compile(), become comparable (only x==y and x!=y operators). This change
should fix the issue #18383: don't duplicate warning filters when the warnings
module is reloaded (thing usually only done in unit tests).

Lib/test/test_re.py
Misc/NEWS
Modules/_sre.c

index aac3a2cbab48c752d70a5cb3cb1849d8f9ec4305..4fcd2d463dcc522a79b109683609e42c4b1b1a8f 100644 (file)
@@ -3,12 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
 import io
 import locale
 import re
-from re import Scanner
 import sre_compile
-import sys
 import string
+import sys
 import traceback
 import unittest
+import warnings
+from re import Scanner
 from weakref import proxy
 
 # Misc tests from Tim Peters' re.doc
@@ -1777,6 +1778,48 @@ SUBPATTERN None 0 0
         self.assertIn('ASCII', str(re.A))
         self.assertIn('DOTALL', str(re.S))
 
+    def test_pattern_compare(self):
+        pattern1 = re.compile('abc', re.IGNORECASE)
+
+        # equal
+        re.purge()
+        pattern2 = re.compile('abc', re.IGNORECASE)
+        self.assertEqual(hash(pattern2), hash(pattern1))
+        self.assertEqual(pattern2, pattern1)
+
+        # not equal: different pattern
+        re.purge()
+        pattern3 = re.compile('XYZ', re.IGNORECASE)
+        # Don't test hash(pattern3) != hash(pattern1) because there is no
+        # warranty that hash values are different
+        self.assertNotEqual(pattern3, pattern1)
+
+        # not equal: different flag (flags=0)
+        re.purge()
+        pattern4 = re.compile('abc')
+        self.assertNotEqual(pattern4, pattern1)
+
+        # only == and != comparison operators are supported
+        with self.assertRaises(TypeError):
+            pattern1 < pattern2
+
+    def test_pattern_compare_bytes(self):
+        pattern1 = re.compile(b'abc')
+
+        # equal: test bytes patterns
+        re.purge()
+        pattern2 = re.compile(b'abc')
+        self.assertEqual(hash(pattern2), hash(pattern1))
+        self.assertEqual(pattern2, pattern1)
+
+        # not equal: pattern of a different types (str vs bytes),
+        # comparison must not raise a BytesWarning
+        re.purge()
+        pattern3 = re.compile('abc')
+        with warnings.catch_warnings():
+            warnings.simplefilter('error', BytesWarning)
+            self.assertNotEqual(pattern3, pattern1)
+
 
 class PatternReprTests(unittest.TestCase):
     def check(self, pattern, expected):
index 7ba6b427700486f5f7cbcdeeff33893440bd9e6d..ab846a6206f70944a4f3f8d75c5f1cfd6cab294e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,11 @@ Core and Builtins
 Library
 -------
 
+- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
+  by re.compile(), become comparable (only x==y and x!=y operators). This
+  change should fix the issue #18383: don't duplicate warning filters when the
+  warnings module is reloaded (thing usually only done in unit tests).
+
 - Issue #20572: The subprocess.Popen.wait method's undocumented
   endtime parameter now raises a DeprecationWarning.
 
@@ -77,7 +82,7 @@ Library
 
 - Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects.
 
-- Issue #28704: Fix create_unix_server to support Path-like objects 
+- Issue #28704: Fix create_unix_server to support Path-like objects
   (PEP 519).
 
 - Issue #28720: Add collections.abc.AsyncGenerator.
index 69c7bc0de6962e72c062fed379e48dc625e13d33..c1e9fa6e6bd4fc9088957df838d54271986c1930 100644 (file)
@@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
 
     self->groups = groups;
 
-    Py_XINCREF(groupindex);
+    Py_INCREF(groupindex);
     self->groupindex = groupindex;
 
-    Py_XINCREF(indexgroup);
+    Py_INCREF(indexgroup);
     self->indexgroup = indexgroup;
 
-    self->weakreflist = NULL;
-
     if (!_validate(self)) {
         Py_DECREF(self);
         return NULL;
@@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
     return (PyObject*) scanner;
 }
 
+static Py_hash_t
+pattern_hash(PatternObject *self)
+{
+    Py_hash_t hash, hash2;
+
+    hash = PyObject_Hash(self->pattern);
+    if (hash == -1) {
+        return -1;
+    }
+
+    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
+    hash ^= hash2;
+
+    hash ^= self->flags;
+    hash ^= self->isbytes;
+    hash ^= self->codesize;
+
+    if (hash == -1) {
+        hash = -2;
+    }
+    return hash;
+}
+
+static PyObject*
+pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
+{
+    PatternObject *left, *right;
+    int cmp;
+
+    if (op != Py_EQ && op != Py_NE) {
+        Py_RETURN_NOTIMPLEMENTED;
+    }
+
+    if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
+        Py_RETURN_NOTIMPLEMENTED;
+    }
+    left = (PatternObject *)lefto;
+    right = (PatternObject *)righto;
+
+    cmp = (left->flags == right->flags
+           && left->isbytes == right->isbytes
+           && left->codesize && right->codesize);
+    if (cmp) {
+        /* Compare the code and the pattern because the same pattern can
+           produce different codes depending on the locale used to compile the
+           pattern when the re.LOCALE flag is used. Don't compare groups,
+           indexgroup nor groupindex: they are derivated from the pattern. */
+        cmp = (memcmp(left->code, right->code,
+                      sizeof(left->code[0]) * left->codesize) == 0);
+    }
+    if (cmp) {
+        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
+                                       Py_EQ);
+        if (cmp < 0) {
+            return NULL;
+        }
+    }
+    if (op == Py_NE) {
+        cmp = !cmp;
+    }
+    return PyBool_FromLong(cmp);
+}
+
 #include "clinic/_sre.c.h"
 
 static PyMethodDef pattern_methods[] = {
@@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = {
     0,                                  /* tp_as_number */
     0,                                  /* tp_as_sequence */
     0,                                  /* tp_as_mapping */
-    0,                                  /* tp_hash */
+    (hashfunc)pattern_hash,             /* tp_hash */
     0,                                  /* tp_call */
     0,                                  /* tp_str */
     0,                                  /* tp_getattro */
@@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = {
     pattern_doc,                        /* tp_doc */
     0,                                  /* tp_traverse */
     0,                                  /* tp_clear */
-    0,                                  /* tp_richcompare */
+    pattern_richcompare,                /* tp_richcompare */
     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
     0,                                  /* tp_iter */
     0,                                  /* tp_iternext */