import bisect, time class CacheDict(dict): """This class works like a basic dictionary except that you can put constraints on its size. Once that size is reached, the key that was inserted or accessed the least recently is removed every time a new key is added.""" def __init__(self, max_size=1000000): '''CacheDict(max_size=1000000): build a cache''' # once max_size is reached, the oldest cache entry will be # pushed out to make room for each new one self.max_size = max_size dict.__init__(self) # _times_dict will map keys to timestamps self._times_dict = {} # _times_list will store (timestamp, key) pairs in sorted # order (oldest first) self._times_list = [] def timestamp(self, key): '''find the timestamp for key''' assert key in self # construct a (timestamp, key) item item = (self._times_dict[key], key) # look for the item in the (sorted) list i = bisect.bisect_left(self._times_list, item) # make sure the index we are returning really is valid if item != self._times_list[i]: raise LookupError return i def __getitem__(self, key): # find the value in the dict value = dict.__getitem__(self, key) # do this to update the timestamp on this key self[key] = value return value def __setitem__(self, key, value): # delete any old instance of the key to make way for the new if key in self: del self._times_list[self.timestamp(key)] # remove old keys until we have enough space to add this one while len(self._times_list) >= self.max_size: key = self._times_list[0][1] del self[key] # add this key, create a timestamp, and update our other data # structures accordingly t = time.time() dict.__setitem__(self, key, value) self._times_dict[key] = t # make sure we keep the list sorted bisect.insort_left(self._times_list, (t, key)) def __delitem__(self, key): # we need to make sure we delete this key out of all three of # our data structures del self._times_list[self.timestamp(key)] del self._times_dict[key] dict.__delitem__(self, key)