]> granicus.if.org Git - python/commitdiff
String interning.
authorGuido van Rossum <guido@python.org>
Sat, 18 Jan 1997 07:55:05 +0000 (07:55 +0000)
committerGuido van Rossum <guido@python.org>
Sat, 18 Jan 1997 07:55:05 +0000 (07:55 +0000)
Objects/dictobject.c
Objects/mappingobject.c
Objects/stringobject.c

index a8b18efb87d542411ddb8454f73eac972cf6e163..a8767d7e30a695a3376b22d7d639217206fe1a01 100644 (file)
@@ -87,6 +87,9 @@ typedef struct {
        int ma_fill;
        int ma_used;
        int ma_size;
+#ifdef INTERN_STRINGS
+       int ma_fast;
+#endif
        mappingentry *ma_table;
 } mappingobject;
 
@@ -106,6 +109,9 @@ newmappingobject()
        mp->ma_table = NULL;
        mp->ma_fill = 0;
        mp->ma_used = 0;
+#ifdef INTERN_STRINGS
+       mp->ma_fast = 1;
+#endif
        return (object *)mp;
 }
 
@@ -163,17 +169,40 @@ lookmapping(mp, key, hash)
        unsigned long sum;
        int incr;
        int size;
+#ifdef INTERN_STRINGS
+       int fast;
+#endif
 
        ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
        ekey = ep->me_key;
        if (ekey == NULL)
                return ep;
+#ifdef INTERN_STRINGS
+       if ((fast = mp->ma_fast)) {
+               object *ikey;
+               if (!is_stringobject(key) ||
+                   (ikey = ((stringobject *)key)->ob_sinterned) == NULL)
+                       fast = 0;
+               else
+                       key = ikey;
+       }
+#endif
        if (ekey == dummy)
                freeslot = ep;
-       else if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
-               return ep;
-       else
+       else {
+#ifdef INTERN_STRINGS
+               if (fast) {
+                       if (ekey == key)
+                               return ep;
+               }
+               else
+#endif
+               {
+                       if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
+                               return ep;
+               }
                freeslot = NULL;
+       }
 
        size = mp->ma_size;
        sum = hash;
@@ -184,6 +213,36 @@ lookmapping(mp, key, hash)
 
        end = mp->ma_table + size;
 
+#ifdef INTERN_STRINGS
+       if (fast) {
+               if (freeslot == NULL) {
+                       for (;;) {
+                               ep += incr;
+                               if (ep >= end)
+                                       ep -= size;
+                               ekey = ep->me_key;
+                               if (ekey == NULL || ekey == key)
+                                       return ep;
+                               if (ekey == dummy) {
+                                       freeslot = ep;
+                                       break;
+                               }
+                       }
+               }
+
+               for (;;) {
+                       ep += incr;
+                       if (ep >= end)
+                               ep -= size;
+                       ekey = ep->me_key;
+                       if (ekey == NULL)
+                               return freeslot;
+                       if (ekey == key)
+                               return ep;
+               }
+       }
+#endif
+
        if (freeslot == NULL) {
                for (;;) {
                        ep += incr;
@@ -339,13 +398,35 @@ mappinginsert(op, key, value)
                err_badcall();
                return -1;
        }
+       mp = (mappingobject *)op;
 #ifdef CACHE_HASH
-       if (!is_stringobject(key) || (hash = ((stringobject *) key)->ob_shash) == -1)
+       if (is_stringobject(key)) {
+#ifdef INTERN_STRINGS
+               if (((stringobject *)key)->ob_sinterned != NULL) {
+                       key = ((stringobject *)key)->ob_sinterned;
+                       hash = ((stringobject *)key)->ob_shash;
+               }
+               else
 #endif
-       hash = hashobject(key);
-       if (hash == -1)
-               return -1;
-       mp = (mappingobject *)op;
+               {
+                       hash = ((stringobject *)key)->ob_shash;
+                       if (hash == -1)
+                               hash = hashobject(key);
+#ifdef INTERN_STRINGS
+                       mp->ma_fast = 0;
+#endif
+               }
+       }
+       else
+#endif
+       {
+               hash = hashobject(key);
+               if (hash == -1)
+                       return -1;
+#ifdef INTERN_STRINGS
+               mp->ma_fast = 0;
+#endif
+       }
        /* if fill >= 2/3 size, resize */
        if (mp->ma_fill*3 >= mp->ma_size*2) {
                if (mappingresize(mp) != 0) {
@@ -907,16 +988,22 @@ setattro(v, name, value)
        object *name;
        object *value;
 {
+       int err;
+       INCREF(name);
+       PyString_InternInPlace(&name);
        if (v->ob_type->tp_setattro != NULL)
-               return (*v->ob_type->tp_setattro)(v, name, value);
-
-       if (name != last_name_object) {
-               XDECREF(last_name_object);
-               INCREF(name);
-               last_name_object = name;
-               last_name_char = getstringvalue(name);
+               err = (*v->ob_type->tp_setattro)(v, name, value);
+       else {
+               if (name != last_name_object) {
+                       XDECREF(last_name_object);
+                       INCREF(name);
+                       last_name_object = name;
+                       last_name_char = getstringvalue(name);
+               }
+               err = setattr(v, last_name_char, value);
        }
-       return setattr(v, last_name_char, value);
+       DECREF(name);
+       return err;
 }
 
 object *
@@ -931,6 +1018,7 @@ dictlookup(v, key)
                        last_name_char = NULL;
                        return NULL;
                }
+               PyString_InternInPlace(&last_name_object);
                last_name_char = getstringvalue(last_name_object);
        }
        return mappinglookup(v, last_name_object);
@@ -949,6 +1037,7 @@ dictinsert(v, key, item)
                        last_name_char = NULL;
                        return -1;
                }
+               PyString_InternInPlace(&last_name_object);
                last_name_char = getstringvalue(last_name_object);
        }
        return mappinginsert(v, last_name_object, item);
index a8b18efb87d542411ddb8454f73eac972cf6e163..a8767d7e30a695a3376b22d7d639217206fe1a01 100644 (file)
@@ -87,6 +87,9 @@ typedef struct {
        int ma_fill;
        int ma_used;
        int ma_size;
+#ifdef INTERN_STRINGS
+       int ma_fast;
+#endif
        mappingentry *ma_table;
 } mappingobject;
 
@@ -106,6 +109,9 @@ newmappingobject()
        mp->ma_table = NULL;
        mp->ma_fill = 0;
        mp->ma_used = 0;
+#ifdef INTERN_STRINGS
+       mp->ma_fast = 1;
+#endif
        return (object *)mp;
 }
 
@@ -163,17 +169,40 @@ lookmapping(mp, key, hash)
        unsigned long sum;
        int incr;
        int size;
+#ifdef INTERN_STRINGS
+       int fast;
+#endif
 
        ep = &mp->ma_table[(unsigned long)hash%mp->ma_size];
        ekey = ep->me_key;
        if (ekey == NULL)
                return ep;
+#ifdef INTERN_STRINGS
+       if ((fast = mp->ma_fast)) {
+               object *ikey;
+               if (!is_stringobject(key) ||
+                   (ikey = ((stringobject *)key)->ob_sinterned) == NULL)
+                       fast = 0;
+               else
+                       key = ikey;
+       }
+#endif
        if (ekey == dummy)
                freeslot = ep;
-       else if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
-               return ep;
-       else
+       else {
+#ifdef INTERN_STRINGS
+               if (fast) {
+                       if (ekey == key)
+                               return ep;
+               }
+               else
+#endif
+               {
+                       if (ep->me_hash == hash && cmpobject(ekey, key) == 0)
+                               return ep;
+               }
                freeslot = NULL;
+       }
 
        size = mp->ma_size;
        sum = hash;
@@ -184,6 +213,36 @@ lookmapping(mp, key, hash)
 
        end = mp->ma_table + size;
 
+#ifdef INTERN_STRINGS
+       if (fast) {
+               if (freeslot == NULL) {
+                       for (;;) {
+                               ep += incr;
+                               if (ep >= end)
+                                       ep -= size;
+                               ekey = ep->me_key;
+                               if (ekey == NULL || ekey == key)
+                                       return ep;
+                               if (ekey == dummy) {
+                                       freeslot = ep;
+                                       break;
+                               }
+                       }
+               }
+
+               for (;;) {
+                       ep += incr;
+                       if (ep >= end)
+                               ep -= size;
+                       ekey = ep->me_key;
+                       if (ekey == NULL)
+                               return freeslot;
+                       if (ekey == key)
+                               return ep;
+               }
+       }
+#endif
+
        if (freeslot == NULL) {
                for (;;) {
                        ep += incr;
@@ -339,13 +398,35 @@ mappinginsert(op, key, value)
                err_badcall();
                return -1;
        }
+       mp = (mappingobject *)op;
 #ifdef CACHE_HASH
-       if (!is_stringobject(key) || (hash = ((stringobject *) key)->ob_shash) == -1)
+       if (is_stringobject(key)) {
+#ifdef INTERN_STRINGS
+               if (((stringobject *)key)->ob_sinterned != NULL) {
+                       key = ((stringobject *)key)->ob_sinterned;
+                       hash = ((stringobject *)key)->ob_shash;
+               }
+               else
 #endif
-       hash = hashobject(key);
-       if (hash == -1)
-               return -1;
-       mp = (mappingobject *)op;
+               {
+                       hash = ((stringobject *)key)->ob_shash;
+                       if (hash == -1)
+                               hash = hashobject(key);
+#ifdef INTERN_STRINGS
+                       mp->ma_fast = 0;
+#endif
+               }
+       }
+       else
+#endif
+       {
+               hash = hashobject(key);
+               if (hash == -1)
+                       return -1;
+#ifdef INTERN_STRINGS
+               mp->ma_fast = 0;
+#endif
+       }
        /* if fill >= 2/3 size, resize */
        if (mp->ma_fill*3 >= mp->ma_size*2) {
                if (mappingresize(mp) != 0) {
@@ -907,16 +988,22 @@ setattro(v, name, value)
        object *name;
        object *value;
 {
+       int err;
+       INCREF(name);
+       PyString_InternInPlace(&name);
        if (v->ob_type->tp_setattro != NULL)
-               return (*v->ob_type->tp_setattro)(v, name, value);
-
-       if (name != last_name_object) {
-               XDECREF(last_name_object);
-               INCREF(name);
-               last_name_object = name;
-               last_name_char = getstringvalue(name);
+               err = (*v->ob_type->tp_setattro)(v, name, value);
+       else {
+               if (name != last_name_object) {
+                       XDECREF(last_name_object);
+                       INCREF(name);
+                       last_name_object = name;
+                       last_name_char = getstringvalue(name);
+               }
+               err = setattr(v, last_name_char, value);
        }
-       return setattr(v, last_name_char, value);
+       DECREF(name);
+       return err;
 }
 
 object *
@@ -931,6 +1018,7 @@ dictlookup(v, key)
                        last_name_char = NULL;
                        return NULL;
                }
+               PyString_InternInPlace(&last_name_object);
                last_name_char = getstringvalue(last_name_object);
        }
        return mappinglookup(v, last_name_object);
@@ -949,6 +1037,7 @@ dictinsert(v, key, item)
                        last_name_char = NULL;
                        return -1;
                }
+               PyString_InternInPlace(&last_name_object);
                last_name_char = getstringvalue(last_name_object);
        }
        return mappinginsert(v, last_name_object, item);
index 048b83cd68222bc92de3cb8ab30ae0386105d683..d656fa1006b0f5a17853079d415f086d49afbe5d 100644 (file)
@@ -97,6 +97,9 @@ newsizedstringobject(str, size)
        op->ob_size = size;
 #ifdef CACHE_HASH
        op->ob_shash = -1;
+#endif
+#ifdef INTERN_STRINGS
+       op->ob_sinterned = NULL;
 #endif
        NEWREF(op);
        if (str != NULL)
@@ -144,6 +147,9 @@ newstringobject(str)
        op->ob_size = size;
 #ifdef CACHE_HASH
        op->ob_shash = -1;
+#endif
+#ifdef INTERN_STRINGS
+       op->ob_sinterned = NULL;
 #endif
        NEWREF(op);
        strcpy(op->ob_sval, str);
@@ -303,6 +309,9 @@ string_concat(a, bb)
        op->ob_size = size;
 #ifdef CACHE_HASH
        op->ob_shash = -1;
+#endif
+#ifdef INTERN_STRINGS
+       op->ob_sinterned = NULL;
 #endif
        NEWREF(op);
        memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
@@ -335,6 +344,9 @@ string_repeat(a, n)
        op->ob_size = size;
 #ifdef CACHE_HASH
        op->ob_shash = -1;
+#endif
+#ifdef INTERN_STRINGS
+       op->ob_sinterned = NULL;
 #endif
        NEWREF(op);
        for (i = 0; i < size; i += a->ob_size)
@@ -462,6 +474,13 @@ typeobject Stringtype = {
        &string_as_sequence,    /*tp_as_sequence*/
        0,              /*tp_as_mapping*/
        (hashfunc)string_hash, /*tp_hash*/
+       0,              /*tp_call*/
+       0,              /*tp_str*/
+       0,              /*tp_getattro*/
+       0,              /*tp_setattro*/
+       0,              /*tp_xxx3*/
+       0,              /*tp_xxx4*/
+       0,              /*tp_doc*/
 };
 
 void
@@ -928,3 +947,59 @@ formatstring(format, args)
                DECREF(args);
        return NULL;
 }
+
+
+#ifdef INTERN_STRINGS
+
+static PyObject *interned;
+
+void
+PyString_InternInPlace(p)
+       PyObject **p;
+{
+       register PyStringObject *s = (PyStringObject *)(*p);
+       PyObject *t;
+       if (s == NULL || !PyString_Check(s))
+               Py_FatalError("PyString_InternInPlace: strings only please!");
+       if ((t = s->ob_sinterned) != NULL) {
+               if (t == (PyObject *)s)
+                       return;
+               Py_INCREF(t);
+               *p = t;
+               Py_DECREF(s);
+               return;
+       }
+       if (interned == NULL) {
+               interned = PyDict_New();
+               if (interned == NULL)
+                       return;
+               /* Force slow lookups: */
+               PyDict_SetItem(interned, Py_None, Py_None);
+       }
+       if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
+               Py_INCREF(t);
+               *p = s->ob_sinterned = t;
+               Py_DECREF(s);
+               return;
+       }
+       t = (PyObject *)s;
+       if (PyDict_SetItem(interned, t, t) == 0) {
+               s->ob_sinterned = t;
+               return;
+       }
+       PyErr_Clear();
+}
+
+
+PyObject *
+PyString_InternFromString(cp)
+       const char *cp;
+{
+       PyObject *s = PyString_FromString(cp);
+       if (s == NULL)
+               return NULL;
+       PyString_InternInPlace(&s);
+       return s;
+}
+
+#endif