Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import array
- import collections
- import itertools
- # Placeholder constants
- FREE = -1
- DUMMY = -2
- class Dict(collections.MutableMapping):
- 'Space efficient dictionary with fast iteration and cheap resizes.'
- def _lookup(self, key, hashvalue):
- 'Same lookup logic as currently used in real dicts'
- assert self.filled < len(self.indices) # At least one open slot
- freeslot = None
- PERTURB_SHIFT = 5
- if hashvalue < 0:
- perturb = -hashvalue
- else:
- perturb = hashvalue
- n = len(self.indices)
- i = perturb & (n-1)
- while True:
- index = self.indices[i]
- if index == FREE:
- return (FREE, i) if freeslot is None else (DUMMY, freeslot)
- elif index == DUMMY:
- freeslot = i
- elif (self.keylist[index] is key or
- self.hashlist[index] == hashvalue
- and self.keylist[index] == key):
- return (index, i)
- i = 5 * i + perturb + 1
- i = i & (n-1)
- perturb >>= PERTURB_SHIFT
- @staticmethod
- def _make_index(n):
- 'New sequence of indices using the smallest possible datatype'
- if n <= 2**7: return array.array('b', [FREE]) * n # signed char
- if n <= 2**15: return array.array('h', [FREE]) * n # signed short
- if n <= 2**31: return array.array('l', [FREE]) * n # signed long
- return [FREE] * n # python integers
- def _resize(self, n):
- '''Reindex the existing hash/key/value entries.
- Entries do not get moved, they only get new indices.
- No calls are made to hash() or __eq__().
- '''
- n = 2 ** n.bit_length() # round-up to power-of-two
- self.indices = self._make_index(n)
- PERTURB_SHIFT = 5
- for index, hashvalue in enumerate(self.hashlist):
- if hashvalue < 0:
- perturb = -hashvalue
- else:
- perturb = hashvalue
- i = hashvalue & (n-1)
- while True:
- if self.indices[i] == FREE:
- break
- i = 5 * i + perturb + 1
- i = i & (n-1)
- perturb >>= PERTURB_SHIFT
- self.indices[i] = index
- self.filled = self.used
- def clear(self):
- self.indices = self._make_index(8)
- self.hashlist = []
- self.keylist = []
- self.valuelist = []
- self.used = 0
- self.filled = 0 # used + dummies
- def __init__(self, *args, **kwds):
- try:
- self.keylist
- except AttributeError:
- self.clear()
- self.update(*args, **kwds)
- def __getitem__(self, key):
- hashvalue = hash(key)
- index, i = self._lookup(key, hashvalue)
- if index < 0:
- raise KeyError(key)
- return self.valuelist[index]
- def __setitem__(self, key, value):
- hashvalue = hash(key)
- index, i = self._lookup(key, hashvalue)
- if index < 0:
- self.indices[i] = self.used
- self.hashlist.append(hashvalue)
- self.keylist.append(key)
- self.valuelist.append(value)
- self.used += 1
- if index == FREE:
- self.filled += 1
- if self.filled * 3 > len(self.indices) * 2:
- self._resize(4 * len(self))
- else:
- self.valuelist[index] = value
- def __delitem__(self, key):
- hashvalue = hash(key)
- index, i = self._lookup(key, hashvalue)
- if index < 0:
- raise KeyError(key)
- self.indices[i] = DUMMY
- self.used -= 1
- # If needed, swap with the lastmost entry to avoid leaving a "hole"
- if index != self.used:
- lasthash = self.hashlist[-1]
- lastkey = self.keylist[-1]
- lastvalue = self.valuelist[-1]
- lastindex, j = self._lookup(lastkey, lasthash)
- assert lastindex >= 0 and i != j
- self.indices[j] = index
- self.hashlist[index] = lasthash
- self.keylist[index] = lastkey
- self.valuelist[index] = lastvalue
- # Remove the lastmost entry
- self.hashlist.pop()
- self.keylist.pop()
- self.valuelist.pop()
- def __len__(self):
- return self.used
- def __iter__(self):
- return iter(self.keylist)
- def iterkeys(self):
- return iter(self.keylist)
- def keys(self):
- return list(self.keylist)
- def itervalues(self):
- return iter(self.valuelist)
- def values(self):
- return list(self.valuelist)
- def iteritems(self):
- return itertools.izip(self.keylist, self.valuelist)
- def items(self):
- return zip(self.keylist, self.valuelist)
- def __contains__(self, key):
- index, i = self._lookup(key, hash(key))
- return index >= 0
- def get(self, key, default=None):
- 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
- index, i = self._lookup(key, hash(key))
- return self.valuelist[index] if index >= 0 else default
- def popitem(self):
- ''' D.popitem() -> (k, v), remove and return some (key, value) pair as a
- 2-tuple; but raise KeyError if D is empty.
- '''
- try:
- key = self.keylist[-1]
- value = self.valuelist[-1]
- except IndexError:
- raise KeyError( 'popitem(): dictionary is empty')
- del self[key]
- return key, value
- def __repr__(self):
- return 'Dict(%r)' % self.items()
- def show_structure(self):
- 'Diagnostic method. Not part of the API.'
- print '=' * 50
- print self
- print 'Indices:', self.indices
- for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
- print i, row
- print '-' * 50
- if __name__ == '__main__':
- import sys
- def f():
- if len(sys.argv) > 1:
- d = {}
- else:
- d = Dict()
- class A(object):
- pass
- for i in range(10000000):
- d[i] = A()
- f()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement