Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "Huffman.h"
- #include <cassert>
- #include <queue>
- #include <unordered_map>
- #include <vector>
- #include <climits>
- typedef unsigned char byte;
- struct SymbolCode {
- byte buffer;
- byte bitsInBuffer;
- explicit SymbolCode(const byte &symbol) : buffer(symbol), bitsInBuffer(CHAR_BIT) {};
- SymbolCode(const byte &buffer, const byte &bitsInBuffer) : buffer(buffer), bitsInBuffer(bitsInBuffer) {};
- SymbolCode() : buffer(0), bitsInBuffer(0) {};
- friend SymbolCode operator+(const SymbolCode &code, const bool &bit) {
- assert(code.bitsInBuffer != CHAR_BIT && "Try to increment full filled buffer");
- SymbolCode temp = code;
- temp.buffer |= (bit << (CHAR_BIT - 1 - code.bitsInBuffer));
- temp.bitsInBuffer++;
- return temp;
- }
- friend void operator+=(SymbolCode &code, const bool &bit) {
- code = code + bit;
- }
- friend bool operator==(const SymbolCode &first, const SymbolCode &second) {
- if (first.buffer == second.buffer && first.bitsInBuffer == second.bitsInBuffer) {
- return true;
- } else {
- return false;
- }
- }
- };
- class BinaryReader {
- private:
- SymbolCode code;
- std::vector<byte>::iterator begin;
- std::vector<byte>::iterator end;
- public:
- bool readBit(bool &bit);
- BinaryReader(const std::vector<byte>::iterator& begin, const std::vector<byte>::iterator& end)
- : begin(begin), end(end) {};
- };
- bool BinaryReader::readBit(bool &bit) {
- if (begin == end && code.bitsInBuffer == 0) {
- return false;
- }
- if (code.bitsInBuffer == 0) {
- code.buffer = *begin++;
- if (begin + 1 == end) {
- code.bitsInBuffer = *begin++;
- } else {
- code.bitsInBuffer = CHAR_BIT;
- }
- }
- if (code.bitsInBuffer == 0) {
- return false;
- }
- bit = code.buffer >> (CHAR_BIT - 1);
- code.buffer <<= 1;
- code.bitsInBuffer--;
- return true;
- }
- class BinaryWriter {
- private:
- SymbolCode code;
- std::vector<byte> &data;
- public:
- void writeSymbol(const SymbolCode &symbolCode);
- void writeByte(const byte &value);
- void writeBit(const bool &bit);
- void writeBuffer();
- explicit BinaryWriter(std::vector<byte> &data) : data(data), code(SymbolCode(0,0)) {};
- };
- void BinaryWriter::writeSymbol(const SymbolCode &symbolCode) {
- if (symbolCode.bitsInBuffer == CHAR_BIT) {
- writeByte(symbolCode.buffer);
- } else {
- for (auto i = 1; i <= symbolCode.bitsInBuffer; i++) {
- writeBit((symbolCode.buffer >> (CHAR_BIT - i)) & 1);
- }
- }
- }
- void BinaryWriter::writeByte(const byte &value) {
- if (code.bitsInBuffer == 0) {
- data.push_back(value);
- } else {
- assert(code.bitsInBuffer != CHAR_BIT && "BinaryWriter buffer was full");
- byte temp = code.buffer | (value >> code.bitsInBuffer);
- data.push_back(temp);
- code.buffer = value << code.bitsInBuffer;
- code.bitsInBuffer = CHAR_BIT - code.bitsInBuffer;
- }
- }
- void BinaryWriter::writeBit(const bool &bit) {
- code += bit;
- if (code.bitsInBuffer == CHAR_BIT) {
- data.push_back(code.buffer);
- code.buffer = code.bitsInBuffer = 0;
- }
- }
- void BinaryWriter::writeBuffer() {
- data.push_back(code.buffer);
- data.push_back(code.bitsInBuffer);
- }
- class HuffmanCoder {
- private:
- struct Node {
- Node *left;
- Node *right;
- byte frequency;
- byte symbol;
- explicit Node(const byte symbol) : symbol(symbol), frequency(0), left(nullptr), right(nullptr) {};
- Node(const byte symbol, const byte frequency) : symbol(symbol), frequency(frequency), left(nullptr), right(nullptr) {};
- Node(const byte frequency, Node *left, Node *right)
- : symbol('\0'), frequency(frequency), left(left), right(right) {};
- struct Comparator {
- bool operator()(const Node *first, const Node *second) {
- return first->frequency > second->frequency;
- }
- };
- };
- Node *root;
- std::unordered_map<byte, SymbolCode> symbolCodes;
- void buildTree(std::vector<byte> &data);
- void deleteTree(Node* node);
- byte getTreeSize(Node *node);
- void writeTree(BinaryWriter &writer);
- void writeNode(BinaryWriter &writer, Node *node);
- void readTree(std::vector<byte> &data);
- void addNodeToTree(const byte &symbol, Node *&node, bool &nodeAdded);
- void createSymbolsCodes(Node *node, SymbolCode currentCode);
- bool findSymbolByCode(std::pair<byte, SymbolCode> &pair);
- public:
- HuffmanCoder() : root(nullptr) {};
- ~HuffmanCoder() {
- deleteTree(root);
- }
- std::vector<byte> encodeData(std::vector<byte> &data);
- std::vector<byte> decodeData(std::vector<byte> &data);
- };
- void HuffmanCoder::buildTree(std::vector<byte> &data) {
- std::unordered_map<byte, int> frequencyMap;
- for (const auto a : data) {
- frequencyMap[a]++;
- }
- std::priority_queue<Node*, std::vector<Node*>, Node::Comparator> queue;
- for (const auto pair : frequencyMap) {
- Node* tempNode = new Node(pair.first, pair.second);
- queue.push(tempNode);
- }
- if (queue.size() == 1) {
- root = new Node('\0');
- root->left = queue.top();
- } else {
- while (queue.size() != 1) {
- Node *left = queue.top();
- queue.pop();
- Node *right = queue.top();
- queue.pop();
- Node *tempNode = new Node(left->frequency + right->frequency, left, right);
- queue.push(tempNode);
- }
- root = queue.top();
- }
- }
- void HuffmanCoder::deleteTree(Node *node) {
- if (node == nullptr) {
- return;
- }
- deleteTree(node->left);
- deleteTree(node->right);
- delete node;
- }
- byte HuffmanCoder::getTreeSize(Node *node) {
- if (node == nullptr) {
- return 0;
- }
- return getTreeSize(node->left) + getTreeSize(node->right) + 1;
- }
- void HuffmanCoder::writeTree(BinaryWriter &writer) {
- writer.writeByte(getTreeSize(root));
- writeNode(writer, root);
- }
- void HuffmanCoder::writeNode(BinaryWriter &writer, Node *node) {
- if (node == nullptr) {
- return;
- }
- writer.writeByte(node->symbol);
- writeNode(writer, node->left);
- writeNode(writer, node->right);
- }
- void HuffmanCoder::readTree(std::vector<byte> &data) {
- assert(!data.empty() && "Try to read Huffman tree from empty data");
- byte treeSize = data.front();
- for (auto i = 1; i <= treeSize; i++) {
- bool flag = false;
- addNodeToTree(data[i], root, flag);
- }
- }
- void HuffmanCoder::addNodeToTree(const byte &symbol, HuffmanCoder::Node *&node, bool &nodeAdded) {
- if (nodeAdded || (node != nullptr && node->symbol != '\0')) {
- return;
- } else if (node == nullptr) {
- node = new Node(symbol);
- nodeAdded = true;
- return;
- } else {
- addNodeToTree(symbol, node->left, nodeAdded);
- addNodeToTree(symbol, node->right, nodeAdded);
- }
- }
- void HuffmanCoder::createSymbolsCodes(Node *node, SymbolCode currentCode) {
- if (node == nullptr) {
- return;
- }
- if (node->symbol != '\0') {
- symbolCodes[node->symbol] = currentCode;
- } else {
- createSymbolsCodes(node->left, currentCode + false);
- createSymbolsCodes(node->right, currentCode + true);
- }
- }
- bool HuffmanCoder::findSymbolByCode(std::pair<byte, SymbolCode> &pair) {
- assert(pair.second.bitsInBuffer != 0 && "Try to find empty symbol in the tree");
- Node *tempNode = root;
- for (auto i = 1; i <= pair.second.bitsInBuffer; i++) {
- if (pair.second.buffer >> (CHAR_BIT - i) & 1) {
- tempNode = tempNode->right;
- } else {
- tempNode = tempNode->left;
- }
- }
- if (tempNode->symbol != '\0') {
- pair.first = tempNode->symbol;
- return true;
- }
- return false;
- }
- std::vector<byte> HuffmanCoder::encodeData(std::vector<byte> &data) {
- buildTree(data);
- createSymbolsCodes(root, SymbolCode(0, 0));
- std::vector<byte> encodedData;
- BinaryWriter writer(encodedData);
- writeTree(writer);
- for (const auto a : data) {
- auto symbolCode = symbolCodes[a];
- writer.writeSymbol(symbolCode);
- }
- writer.writeBuffer();
- return encodedData;
- }
- std::vector<byte> HuffmanCoder::decodeData(std::vector<byte> &data) {
- readTree(data);
- std::vector<byte> decodedData;
- BinaryWriter writer(decodedData);
- BinaryReader reader(data.begin() + getTreeSize(root) + 1, data.end());
- std::pair<byte, SymbolCode> symbol{'\0', SymbolCode()};
- bool bit;
- while (reader.readBit(bit)) {
- symbol.second += bit;
- if (findSymbolByCode(symbol)) {
- writer.writeByte(symbol.first);
- symbol = std::make_pair('\0', SymbolCode());
- }
- }
- return decodedData;
- }
- std::vector<byte> getDataFromStream(CInputStream &stream) {
- std::vector<byte> data;
- byte temp;
- while(stream.Read(temp)) {
- data.push_back(temp);
- }
- return data;
- }
- void writeDataInStream(COutputStream &stream, std::vector<byte> &data) {
- for (const auto a : data) {
- stream.Write(a);
- }
- }
- void Encode(CInputStream& original, COutputStream& compressed) {
- std::vector<byte> originalData = getDataFromStream(original);
- HuffmanCoder huffmanCoder = HuffmanCoder();
- std::vector<byte> compressedData = huffmanCoder.encodeData(originalData);
- writeDataInStream(compressed, compressedData);
- }
- void Decode(CInputStream& compressed, COutputStream& original) {
- std::vector<byte> compressedData = getDataFromStream(compressed);
- HuffmanCoder huffmanCoder = HuffmanCoder();
- std::vector<byte> originalData = huffmanCoder.decodeData(compressedData);
- writeDataInStream(original, originalData);
- }
- int main() {
- {
- CInputStream inputStream("input.txt");
- COutputStream outputStream("output.txt");
- Encode(inputStream, outputStream);
- }
- {
- CInputStream inputStream("output.txt");
- COutputStream outputStream("input.txt");
- Decode(inputStream, outputStream);
- }
- return 0;
- }
Add Comment
Please, Sign In to add comment