typedef struct lru_cache_object {
lru_list_elem root; /* includes PyObject_HEAD */
- Py_ssize_t maxsize;
- PyObject *maxsize_O;
- PyObject *func;
lru_cache_ternaryfunc wrapper;
+ int typed;
PyObject *cache;
+ Py_ssize_t hits;
+ PyObject *func;
+ Py_ssize_t maxsize;
+ Py_ssize_t misses;
PyObject *cache_info_type;
- Py_ssize_t misses, hits;
- int typed;
PyObject *dict;
- int full;
} lru_cache_object;
static PyTypeObject lru_cache_type;
/* short path, key will match args anyway, which is a tuple */
if (!typed && !kwds) {
+ if (PyTuple_GET_SIZE(args) == 1) {
+ key = PyTuple_GET_ITEM(args, 0);
+ if (PyUnicode_CheckExact(key) || PyLong_CheckExact(key)) {
+ /* For common scalar keys, save space by
+ dropping the enclosing args tuple */
+ Py_INCREF(key);
+ return key;
+ }
+ }
Py_INCREF(args);
return args;
}
}
static void
-lru_cache_extricate_link(lru_list_elem *link)
+lru_cache_extract_link(lru_list_elem *link)
{
- link->prev->next = link->next;
- link->next->prev = link->prev;
+ lru_list_elem *link_prev = link->prev;
+ lru_list_elem *link_next = link->next;
+ link_prev->next = link->next;
+ link_next->prev = link->prev;
}
static void
link->next = root;
}
+static void
+lru_cache_prepend_link(lru_cache_object *self, lru_list_elem *link)
+{
+ lru_list_elem *root = &self->root;
+ lru_list_elem *first = root->next;
+ first->prev = root->next = link;
+ link->prev = root;
+ link->next = first;
+}
+
+/* General note on reentrancy:
+
+ There are four dictionary calls in the bounded_lru_cache_wrapper():
+ 1) The initial check for a cache match. 2) The post user-function
+ check for a cache match. 3) The deletion of the oldest entry.
+ 4) The addition of the newest entry.
+
+ In all four calls, we have a known hash which lets use avoid a call
+ to __hash__(). That leaves only __eq__ as a possible source of a
+ reentrant call.
+
+ The __eq__ method call is always made for a cache hit (dict access #1).
+ Accordingly, we have make sure not modify the cache state prior to
+ this call.
+
+ The __eq__ method call is never made for the deletion (dict access #3)
+ because it is an identity match.
+
+ For the other two accesses (#2 and #4), calls to __eq__ only occur
+ when some other entry happens to have an exactly matching hash (all
+ 64-bits). Though rare, this can happen, so we have to make sure to
+ either call it at the top of its code path before any cache
+ state modifications (dict access #2) or be prepared to restore
+ invariants at the end of the code path (dict access #4).
+
+ Another possible source of reentrancy is a decref which can trigger
+ arbitrary code execution. To make the code easier to reason about,
+ the decrefs are deferred to the end of the each possible code path
+ so that we know the cache is a consistent state.
+ */
+
static PyObject *
bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds)
{
lru_list_elem *link;
- PyObject *key, *result;
+ PyObject *key, *result, *testresult;
Py_hash_t hash;
key = lru_cache_make_key(args, kwds, self->typed);
return NULL;
}
link = (lru_list_elem *)_PyDict_GetItem_KnownHash(self->cache, key, hash);
- if (link) {
- lru_cache_extricate_link(link);
+ if (link != NULL) {
+ lru_cache_extract_link(link);
lru_cache_append_link(self, link);
- self->hits++;
result = link->result;
+ self->hits++;
Py_INCREF(result);
Py_DECREF(key);
return result;
Py_DECREF(key);
return NULL;
}
+ self->misses++;
result = PyObject_Call(self->func, args, kwds);
if (!result) {
Py_DECREF(key);
return NULL;
}
- if (self->full && self->root.next != &self->root) {
- /* Use the oldest item to store the new key and result. */
- PyObject *oldkey, *oldresult, *popresult;
- /* Extricate the oldest item. */
- link = self->root.next;
- lru_cache_extricate_link(link);
- /* Remove it from the cache.
- The cache dict holds one reference to the link,
- and the linked list holds yet one reference to it. */
- popresult = _PyDict_Pop_KnownHash(self->cache,
- link->key, link->hash,
- Py_None);
- if (popresult == Py_None) {
- /* Getting here means that this same key was added to the
- cache while the lock was released. Since the link
- update is already done, we need only return the
- computed result and update the count of misses. */
- Py_DECREF(popresult);
- Py_DECREF(link);
- Py_DECREF(key);
- }
- else if (popresult == NULL) {
- lru_cache_append_link(self, link);
- Py_DECREF(key);
- Py_DECREF(result);
- return NULL;
- }
- else {
- Py_DECREF(popresult);
- /* Keep a reference to the old key and old result to
- prevent their ref counts from going to zero during the
- update. That will prevent potentially arbitrary object
- clean-up code (i.e. __del__) from running while we're
- still adjusting the links. */
- oldkey = link->key;
- oldresult = link->result;
-
- link->hash = hash;
- link->key = key;
- link->result = result;
- if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
- hash) < 0) {
- Py_DECREF(link);
- Py_DECREF(oldkey);
- Py_DECREF(oldresult);
- return NULL;
- }
- lru_cache_append_link(self, link);
- Py_INCREF(result); /* for return */
- Py_DECREF(oldkey);
- Py_DECREF(oldresult);
- }
- } else {
- /* Put result in a new link at the front of the queue. */
+ testresult = _PyDict_GetItem_KnownHash(self->cache, key, hash);
+ if (testresult != NULL) {
+ /* Getting here means that this same key was added to the cache
+ during the PyObject_Call(). Since the link update is already
+ done, we need only return the computed result. */
+ Py_DECREF(key);
+ return result;
+ }
+ if (PyErr_Occurred()) {
+ /* This is an unusual case since this same lookup
+ did not previously trigger an error during lookup.
+ Treat it the same as an error in user function
+ and return with the error set. */
+ Py_DECREF(key);
+ Py_DECREF(result);
+ return NULL;
+ }
+ /* This is the normal case. The new key wasn't found before
+ user function call and it is still not there. So we
+ proceed normally and update the cache with the new result. */
+
+ assert(self->maxsize > 0);
+ if (PyDict_GET_SIZE(self->cache) < self->maxsize ||
+ self->root.next == &self->root)
+ {
+ /* Cache is not full, so put the result in a new link */
link = (lru_list_elem *)PyObject_New(lru_list_elem,
&lru_list_elem_type);
if (link == NULL) {
link->hash = hash;
link->key = key;
link->result = result;
+ /* What is really needed here is a SetItem variant with a "no clobber"
+ option. If the __eq__ call triggers a reentrant call that adds
+ this same key, then this setitem call will update the cache dict
+ with this new link, leaving the old link as an orphan (i.e. not
+ having a cache dict entry that refers to it). */
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
hash) < 0) {
Py_DECREF(link);
}
lru_cache_append_link(self, link);
Py_INCREF(result); /* for return */
- self->full = (PyDict_GET_SIZE(self->cache) >= self->maxsize);
+ return result;
}
- self->misses++;
+ /* Since the cache is full, we need to evict an old key and add
+ a new key. Rather than free the old link and allocate a new
+ one, we reuse the link for the new key and result and move it
+ to front of the cache to mark it as recently used.
+
+ We try to assure all code paths (including errors) leave all
+ of the links in place. Either the link is successfully
+ updated and moved or it is restored to its old position.
+ However if an unrecoverable error is found, it doesn't
+ make sense to reinsert the link, so we leave it out
+ and the cache will no longer register as full.
+ */
+ PyObject *oldkey, *oldresult, *popresult;
+
+ /* Extract the oldest item. */
+ assert(self->root.next != &self->root);
+ link = self->root.next;
+ lru_cache_extract_link(link);
+ /* Remove it from the cache.
+ The cache dict holds one reference to the link,
+ and the linked list holds yet one reference to it. */
+ popresult = _PyDict_Pop_KnownHash(self->cache, link->key,
+ link->hash, Py_None);
+ if (popresult == Py_None) {
+ /* Getting here means that the user function call or another
+ thread has already removed the old key from the dictionary.
+ This link is now an orpan. Since we don't want to leave the
+ cache in an inconsistent state, we don't restore the link. */
+ Py_DECREF(popresult);
+ Py_DECREF(link);
+ Py_DECREF(key);
+ return result;
+ }
+ if (popresult == NULL) {
+ /* An error arose while trying to remove the oldest key (the one
+ being evicted) from the cache. We restore the link to its
+ original position as the oldest link. Then we allow the
+ error propagate upward; treating it the same as an error
+ arising in the user function. */
+ lru_cache_prepend_link(self, link);
+ Py_DECREF(key);
+ Py_DECREF(result);
+ return NULL;
+ }
+ /* Keep a reference to the old key and old result to prevent their
+ ref counts from going to zero during the update. That will
+ prevent potentially arbitrary object clean-up code (i.e. __del__)
+ from running while we're still adjusting the links. */
+ oldkey = link->key;
+ oldresult = link->result;
+
+ link->hash = hash;
+ link->key = key;
+ link->result = result;
+ /* Note: The link is being added to the cache dict without the
+ prev and next fields set to valid values. We have to wait
+ for successful insertion in the cache dict before adding the
+ link to the linked list. Otherwise, the potentially reentrant
+ __eq__ call could cause the then ophan link to be visited. */
+ if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
+ hash) < 0) {
+ /* Somehow the cache dict update failed. We no longer can
+ restore the old link. Let the error propagate upward and
+ leave the cache short one link. */
+ Py_DECREF(popresult);
+ Py_DECREF(link);
+ Py_DECREF(oldkey);
+ Py_DECREF(oldresult);
+ return NULL;
+ }
+ lru_cache_append_link(self, link);
+ Py_INCREF(result); /* for return */
+ Py_DECREF(popresult);
+ Py_DECREF(oldkey);
+ Py_DECREF(oldresult);
return result;
}
maxsize = PyNumber_AsSsize_t(maxsize_O, PyExc_OverflowError);
if (maxsize == -1 && PyErr_Occurred())
return NULL;
+ if (maxsize < 0) {
+ maxsize = 0;
+ }
if (maxsize == 0)
wrapper = uncached_lru_cache_wrapper;
else
return NULL;
}
- obj->cache = cachedict;
obj->root.prev = &obj->root;
obj->root.next = &obj->root;
- obj->maxsize = maxsize;
- Py_INCREF(maxsize_O);
- obj->maxsize_O = maxsize_O;
+ obj->wrapper = wrapper;
+ obj->typed = typed;
+ obj->cache = cachedict;
Py_INCREF(func);
obj->func = func;
- obj->wrapper = wrapper;
obj->misses = obj->hits = 0;
- obj->typed = typed;
+ obj->maxsize = maxsize;
Py_INCREF(cache_info_type);
obj->cache_info_type = cache_info_type;
-
return (PyObject *)obj;
}
PyObject_GC_UnTrack(obj);
list = lru_cache_unlink_list(obj);
- Py_XDECREF(obj->maxsize_O);
- Py_XDECREF(obj->func);
Py_XDECREF(obj->cache);
- Py_XDECREF(obj->dict);
+ Py_XDECREF(obj->func);
Py_XDECREF(obj->cache_info_type);
+ Py_XDECREF(obj->dict);
lru_cache_clear_list(list);
Py_TYPE(obj)->tp_free(obj);
}
static PyObject *
lru_cache_cache_info(lru_cache_object *self, PyObject *unused)
{
- return PyObject_CallFunction(self->cache_info_type, "nnOn",
- self->hits, self->misses, self->maxsize_O,
+ if (self->maxsize == -1) {
+ return PyObject_CallFunction(self->cache_info_type, "nnOn",
+ self->hits, self->misses, Py_None,
+ PyDict_GET_SIZE(self->cache));
+ }
+ return PyObject_CallFunction(self->cache_info_type, "nnnn",
+ self->hits, self->misses, self->maxsize,
PyDict_GET_SIZE(self->cache));
}
{
lru_list_elem *list = lru_cache_unlink_list(self);
self->hits = self->misses = 0;
- self->full = 0;
PyDict_Clear(self->cache);
lru_cache_clear_list(list);
Py_RETURN_NONE;
Py_VISIT(link->result);
link = next;
}
- Py_VISIT(self->maxsize_O);
Py_VISIT(self->func);
Py_VISIT(self->cache);
Py_VISIT(self->cache_info_type);
lru_cache_tp_clear(lru_cache_object *self)
{
lru_list_elem *list = lru_cache_unlink_list(self);
- Py_CLEAR(self->maxsize_O);
Py_CLEAR(self->func);
Py_CLEAR(self->cache);
Py_CLEAR(self->cache_info_type);