Advertisement
opexxx

entropy.py

Jul 17th, 2013
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.01 KB | None | 0 0
  1. #!/usr/bin/python
  2. from math import log
  3.  
  4. class Entropy:
  5.     def __init__(self):
  6.         self.frequence = dict( (chr(index), 0) for index in xrange(0, 256) )
  7.         self.count = 0
  8.  
  9.     def readBytes(self, bytes):
  10.         for byte in bytes:
  11.             self.frequence[byte] = self.frequence[byte] + 1
  12.         self.count += len(bytes)
  13.         return self
  14.  
  15.     def compute(self):
  16.         h = 0
  17.         for value in self.frequence.itervalues():
  18.             if not value:
  19.                 continue
  20.             p_i = float(value) / self.count
  21.             h -= p_i * log(p_i, 2)
  22.         return h
  23.  
  24. from time import time
  25. from sys import stderr
  26.  
  27. class EntropyFile(Entropy):
  28.     def __init__(self):
  29.         Entropy.__init__(self)
  30.         self.progress_time = 1.0
  31.         self.buffer_size = 4096
  32.  
  33.     def displayProgress(self, percent):
  34.         print >>stderr, "Progress: %.1f%%" % percent
  35.  
  36.     def readStream(self, stream, streamsize=None):
  37.         # Read stream size
  38.         if streamsize is None:
  39.             stream.seek(0, 2)
  40.             streamsize = stream.tell()
  41.         if streamsize <= 0:
  42.             raise ValueError("Empty stream")
  43.  
  44.         # Read stream content
  45.         stream.seek(0,0)
  46.         next_msg = time() + self.progress_time
  47.         while True:
  48.             if next_msg <= time():
  49.                 self.displayProgress(stream.tell() * 100.0 / streamsize)
  50.                 next_msg = time() + self.progress_time
  51.             raw = stream.read(self.buffer_size)
  52.             if not raw:
  53.                 break
  54.             self.readBytes(raw)
  55.         return self
  56.  
  57.     def readFile(self, filename):
  58.         stream = open(filename, 'rb')
  59.         self.readStream(stream)
  60.         return self
  61.  
  62. def main():
  63.     from sys import argv, exit
  64.     if len(argv) != 2:
  65.         print >>stderr, "usage: %s filename" % argv[0]
  66.         exit(1)
  67.     entropy = EntropyFile()
  68.     entropy.readFile(argv[1])
  69.     print "Entropy: %.4f bit/byte" % entropy.compute()
  70.     exit(0)
  71.  
  72. if __name__ == "__main__":
  73.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement