Advertisement
CSenshi

Information Theory - HW1 (tmp)

Oct 8th, 2019
403
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.62 KB | None | 0 0
  1. import sys
  2. import heapq as hp
  3.  
  4. ''' class for heap tree '''
  5.  
  6.  
  7. class Node:
  8.     def __init__(self, index, freq):
  9.         self.index = index
  10.         self.freq = freq
  11.         self.left = None
  12.         self.right = None
  13.  
  14.     def __lt__(self, other):
  15.         return self.freq < other.freq
  16.  
  17.  
  18. def rec(elem, len, res):
  19.     if elem.index != None:
  20.         res.append((elem.index, len))
  21.         return
  22.     rec(elem.left, len+1, res)
  23.     rec(elem.right, len+1, res)
  24.  
  25.  
  26. def huffman(arr):
  27.     # create array of tuples (frequency, index)
  28.     # [(0.009047, 6), (0.021247, 3), (0.041388, 7), (0.045914, 4), (0.047731, 5)...]
  29.     arr = [(arr[x], x) for x in range(len(arr))]
  30.     # print(sorted(arr, key=lambda x: x[0]))
  31.  
  32.     # create queue
  33.     pq = []
  34.     for key in arr:
  35.         node = Node(freq=key[0], index=key[1])
  36.         hp.heappush(pq, node)
  37.  
  38.     # create tree
  39.     while len(pq) > 1:
  40.         f = hp.heappop(pq)
  41.         s = hp.heappop(pq)
  42.  
  43.         merged = Node(freq=f.freq + s.freq, index=None)
  44.         merged.left = f
  45.         merged.right = s
  46.  
  47.         hp.heappush(pq, merged)
  48.  
  49.     # evaluate code lengths
  50.     res = []
  51.     rec(hp.heappop(pq), 0, res)
  52.     # print(sorted(res, key=lambda x: x[1]))
  53.     return res
  54.  
  55.  
  56. def craft_value(arr):
  57.     # arr = [(freq, len)]
  58.     sum = 0
  59.     for _, len in arr:
  60.         sum += 2**(0-len)
  61.     return sum
  62.  
  63.  
  64. def build_code(arr):
  65.     # Create tuple of (length, index, code)
  66.     for i in range(n):
  67.         arr[i] = (arr[i][1], arr[i][0], '')
  68.  
  69.     # sort by length of codes
  70.     arr = sorted(arr, key=lambda x: x[0])
  71.  
  72.     dlm = ''
  73.     for i in range(len(arr)):
  74.         tmp = list(arr[i])
  75.  
  76.         tmp[2] = dlm + '0' * (arr[i][0] - len(dlm))
  77.         dlm = "{0:b}".format((int(tmp[2], 2) + 1))
  78.         dlm = '0' * (len(tmp[2]) - len(dlm)) + dlm
  79.  
  80.         arr[i] = tmp
  81.  
  82.     # sort by length of codes
  83.     arr = sorted(arr, key=lambda x: x[1])
  84.  
  85.     return arr
  86.  
  87.  
  88. if __name__ == "__main__":
  89.     if len(sys.argv) != 3:
  90.         print("Usage : python3 Huffman.py src-fname dest-fname")
  91.         exit()
  92.  
  93.     src_fname = sys.argv[1]
  94.     dst_fname = sys.argv[2]
  95.  
  96.     with open(src_fname, 'r') as f:
  97.         n = int(f.readline())
  98.         arr = list(map(float, f.readline().split()))
  99.  
  100.     # find length of codes with probabilities
  101.     arr = huffman(arr)
  102.  
  103.     # calculate craft value
  104.     cv = craft_value(arr)
  105.     if cv > 1:
  106.         exit()
  107.  
  108.     # build code
  109.     arr = build_code(arr)
  110.  
  111.     # Write Result
  112.     with open(dst_fname, 'w+') as df:
  113.         # Craft Value
  114.         df.write('{}\n'.format(cv))
  115.  
  116.         for len, ind, code in arr:
  117.             df.write('{}\n'.format(code))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement