Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 1. Node class: each node in the huffman tree is represented by a Node class . A node can be eather a leaf(containg a symbol)
- # or an internal node with left and right children
- # 2. Building a huffman tree: Here we use a min heap to build a huffman tree, we repeatedly combine the two nodes with the smallest
- # smallest frequency unitil only one node remains, which is the root of the huffman tree.
- # 3. Generating codes: Starting form the root, we assign "0" for left branches and "1" for right branches, building a binary code for each symbol.
- # these codes are stored in a dictionary called codelook.
- # 4. Encoding and decoding: we encode the data by replacing each symbol with its binary code from the codebook.
- # decoding involves traversing the huffman tree according to each bit in the encoded data to tetrive the original symbols.
- from collections import Counter
- import heapq
- class Node:
- def __init__(self, symbol=None, freq=0, left=None, right=None):
- self.freq = freq
- self.symbol = symbol
- self.left = left
- self.right = right
- def __lt__(self, other):
- return self.freq < other.freq
- def build_huffman_tree(data):
- # Count the frequency of each symbol in the data
- frequency = Counter(data)
- # Create a priority queue (min-heap) for the nodes
- heap = [Node(symbol, freq) for symbol, freq in frequency.items()]
- heapq.heapify(heap)
- # Build the Huffman tree
- while len(heap) > 1:
- left = heapq.heappop(heap)
- right = heapq.heappop(heap)
- merged = Node(freq=left.freq + right.freq, left=left, right=right)
- heapq.heappush(heap, merged)
- # The remaining node is the root of the tree
- return heap[0]
- def generate_huffman_codes(node, prefix="", codelook={}):
- if node.symbol is not None:
- codelook[node.symbol] = prefix
- else:
- # Traverse left (append '0') and right (append '1')
- generate_huffman_codes(node.left, prefix + '0', codelook)
- generate_huffman_codes(node.right, prefix + '1', codelook)
- return codelook
- def huffman_encoding(data, codelook):
- # Generate the encoded string based on the Huffman codes
- return ''.join(codelook[symbol] for symbol in data)
- def huffman_decoding(encoded_data, root):
- # Decode the encoded data back to the original string
- decoded_data = []
- node = root
- for bit in encoded_data:
- node = node.left if bit == '0' else node.right
- if node.symbol is not None:
- decoded_data.append(node.symbol)
- node = root
- return ''.join(decoded_data)
- # Test with sample data
- data = "We are the student of amity"
- root = build_huffman_tree(data)
- codelook = generate_huffman_codes(root)
- encoded_data = huffman_encoding(data, codelook)
- decoded_data = huffman_decoding(encoded_data, root)
- print("Encoded Data:", encoded_data)
- print("Decoded Data:", decoded_data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement