import bisect, time class CacheDict(dict): """This class works like a basic dictionary except that you can put constraints on its size. Once that size is reached, the key that was inserted or accessed the least recently is removed every time a new key is added.""" def __init__(self, max_size=10000): '''build a cache''' # once max_size is reached, the oldest cache entry will be pushed out to # make room for each new one. self.max_size = max_size dict.__init__(self) # _times_dict will map keys to timestamps. self._times_dict = {} # _times_list will store (timestamp, key) pairs, sorted oldest first. self._times_list = [] def _timestamp_index(self, key): '''find the index in the list of timestamps for the given key''' assert key in self item = (self._times_dict[key], key) # look for the item in the (sorted) list i = bisect.bisect_left(self._times_list, item) # make sure the index we are returning really is valid if item != self._times_list[i]: raise LookupError("key %r was not found" % key) return i def __getitem__(self, key): '''implements d[key]''' # find the value in the dict value = dict.__getitem__(self, key) # do this to update the timestamp on this key self[key] = value return value def __setitem__(self, key, value): '''implements d[key] = value''' # delete any old instance of the key to make way for the new if key in self: del self._times_list[self._timestamp_index(key)] # remove old keys until we have enough space to add this one while len(self._times_list) >= self.max_size: key = self._times_list[0][1] del self[key] # add this key, create a timestamp, and update our other data # structures accordingly t = time.time() dict.__setitem__(self, key, value) self._times_dict[key] = t # make sure we keep the list sorted bisect.insort_left(self._times_list, (t, key)) def __delitem__(self, key): '''implements del d[key]''' # we need to make sure we delete this key out of all three of our data # structures del self._times_list[self._timestamp_index(key)] del self._times_dict[key] dict.__delitem__(self, key)