]> granicus.if.org Git - python/commitdiff
Subclasses of string can no longer be interned. The semantics of
authorJeremy Hylton <jeremy@alum.mit.edu>
Sat, 7 Aug 2004 19:20:05 +0000 (19:20 +0000)
committerJeremy Hylton <jeremy@alum.mit.edu>
Sat, 7 Aug 2004 19:20:05 +0000 (19:20 +0000)
interning were not clear here -- a subclass could be mutable, for
example -- and had bugs.  Explicitly interning a subclass of string
via intern() will raise a TypeError.  Internal operations that attempt
to intern a string subclass will have no effect.

Added a few tests to test_builtin that includes the old buggy code and
verifies that calls like PyObject_SetAttr() don't fail.  Perhaps these
tests should have gone in test_string.

Lib/test/test_builtin.py
Misc/NEWS
Objects/stringobject.c
Python/bltinmodule.c

index bc5afdcb41e190ed17a09c103e503fdc08873b61..8e3a925032d4420c8665d289f6ba24204b68ac65 100644 (file)
@@ -608,6 +608,23 @@ class BuiltinTest(unittest.TestCase):
         s2 = s.swapcase().swapcase()
         self.assert_(intern(s2) is s)
 
+        # Subclasses of string can't be interned, because they
+        # provide too much opportunity for insane things to happen.
+        # We don't want them in the interned dict and if they aren't
+        # actually interned, we don't want to create the appearance
+        # that they are by allowing intern() to succeeed.
+        class S(str):
+            def __hash__(self):
+                return 123
+
+        self.assertRaises(TypeError, intern, S("abc"))
+
+        # It's still safe to pass these strings to routines that
+        # call intern internally, e.g. PyObject_SetAttr().
+        s = S("abc")
+        setattr(s, s, s)
+        self.assertEqual(getattr(s, s), s)
+
     def test_iter(self):
         self.assertRaises(TypeError, iter)
         self.assertRaises(TypeError, iter, 42, 42)
index 9f80274a16fd25404fc1663456b2da3555a797ff..7550076ea48bd2c70507f41420cbae57ac4c51a8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,12 @@ What's New in Python 2.4 alpha 3?
 Core and builtins
 -----------------
 
+Subclasses of string can no longer be interned.  The semantics of
+interning were not clear here -- a subclass could be mutable, for
+example -- and had bugs.  Explicitly interning a subclass of string
+via intern() will raise a TypeError.  Internal operations that attempt
+to intern a string subclass will have no effect.
+
 Extension modules
 -----------------
 
index 29562a9a8f317259dc65da79119f1745108275e6..f29929b0ec5a88536adba15026b1a8c494422e13 100644 (file)
@@ -4313,6 +4313,10 @@ PyString_InternInPlace(PyObject **p)
        PyObject *t;
        if (s == NULL || !PyString_Check(s))
                Py_FatalError("PyString_InternInPlace: strings only please!");
+       /* If it's a string subclass, we don't really know what putting
+          it in the interned dict might do. */
+       if (!PyString_CheckExact(s))
+               return;
        if (PyString_CHECK_INTERNED(s))
                return;
        if (interned == NULL) {
@@ -4322,36 +4326,22 @@ PyString_InternInPlace(PyObject **p)
                        return;
                }
        }
-       if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
+       t = PyDict_GetItem(interned, (PyObject *)s);
+       if (t) {
                Py_INCREF(t);
                Py_DECREF(*p);
                *p = t;
                return;
        }
-       /* Ensure that only true string objects appear in the intern dict */
-       if (!PyString_CheckExact(s)) {
-               t = PyString_FromStringAndSize(PyString_AS_STRING(s),
-                                               PyString_GET_SIZE(s));
-               if (t == NULL) {
-                       PyErr_Clear();
-                       return;
-               }
-       } else {
-               t = (PyObject*) s;
-               Py_INCREF(t);
-       }
 
-       if (PyDict_SetItem(interned, t, t) == 0) {
-               /* The two references in interned are not counted by
-               refcnt.  The string deallocator will take care of this */
-               ((PyObject *)t)->ob_refcnt-=2;
-               PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
-               Py_DECREF(*p);
-               *p = t;
+       if (PyDict_SetItem(interned, s, s) < 0) {
+               PyErr_Clear();
                return;
        }
-       Py_DECREF(t);
-       PyErr_Clear();
+       /* The two references in interned are not counted by refcnt.
+          The string deallocator will take care of this */
+       (*p)->ob_refcnt -= 2;
+       PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
 }
 
 void
index 8ff733c4d8e76d8863e6365b636387cadce9a3fe..04fcf597b1ae9af8bed4704779cb096d810be62d 100644 (file)
@@ -1035,6 +1035,11 @@ builtin_intern(PyObject *self, PyObject *args)
        PyObject *s;
        if (!PyArg_ParseTuple(args, "S:intern", &s))
                return NULL;
+       if (!PyString_CheckExact(s)) {
+               PyErr_SetString(PyExc_TypeError,
+                               "can't intern subclass of string");
+               return NULL;
+       }
        Py_INCREF(s);
        PyString_InternInPlace(&s);
        return s;