Lucky_Dummy

Untitled

Dec 23rd, 2021 (edited)
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.22 KB | None | 0 0
  1. #include "Huffman.h"
  2. #include <cassert>
  3. #include <queue>
  4. #include <unordered_map>
  5. #include <vector>
  6. #include <climits>
  7.  
  8. typedef unsigned char byte;
  9. struct SymbolCode {
  10.     byte buffer;
  11.     byte bitsInBuffer;
  12.  
  13.     explicit SymbolCode(const byte &symbol) : buffer(symbol), bitsInBuffer(CHAR_BIT) {};
  14.     SymbolCode(const byte &buffer, const byte &bitsInBuffer) : buffer(buffer), bitsInBuffer(bitsInBuffer) {};
  15.     SymbolCode() : buffer(0), bitsInBuffer(0) {};
  16.  
  17.     friend SymbolCode operator+(const SymbolCode &code, const bool &bit) {
  18.         assert(code.bitsInBuffer != CHAR_BIT && "Try to increment full filled buffer");
  19.         SymbolCode temp = code;
  20.         temp.buffer |= (bit << (CHAR_BIT - 1 - code.bitsInBuffer));
  21.         temp.bitsInBuffer++;
  22.         return temp;
  23.     }
  24.     friend void operator+=(SymbolCode &code, const bool &bit) {
  25.         code = code + bit;
  26.     }
  27.  
  28.     friend bool operator==(const SymbolCode &first, const SymbolCode &second) {
  29.         if (first.buffer == second.buffer && first.bitsInBuffer == second.bitsInBuffer) {
  30.             return true;
  31.         } else {
  32.             return false;
  33.         }
  34.     }
  35. };
  36.  
  37. class BinaryReader {
  38. private:
  39.     SymbolCode code;
  40.     std::vector<byte>::iterator begin;
  41.     std::vector<byte>::iterator end;
  42.  
  43. public:
  44.     bool readBit(bool &bit);
  45.  
  46.     BinaryReader(const std::vector<byte>::iterator& begin, const std::vector<byte>::iterator& end)
  47.         : begin(begin), end(end) {};
  48. };
  49.  
  50. bool BinaryReader::readBit(bool &bit) {
  51.     if (begin == end && code.bitsInBuffer == 0) {
  52.         return false;
  53.     }
  54.  
  55.     if (code.bitsInBuffer == 0) {
  56.         code.buffer = *begin++;
  57.         if (begin + 1 == end) {
  58.             code.bitsInBuffer = *begin++;
  59.         } else {
  60.             code.bitsInBuffer = CHAR_BIT;
  61.         }
  62.     }
  63.     if (code.bitsInBuffer == 0) {
  64.         return false;
  65.     }
  66.     bit = code.buffer >> (CHAR_BIT - 1);
  67.     code.buffer <<= 1;
  68.     code.bitsInBuffer--;
  69.     return true;
  70. }
  71.  
  72. class BinaryWriter {
  73. private:
  74.     SymbolCode code;
  75.     std::vector<byte> &data;
  76.  
  77. public:
  78.     void writeSymbol(const SymbolCode &symbolCode);
  79.     void writeByte(const byte &value);
  80.     void writeBit(const bool &bit);
  81.     void writeBuffer();
  82.  
  83.     explicit BinaryWriter(std::vector<byte> &data) : data(data), code(SymbolCode(0,0)) {};
  84. };
  85.  
  86. void BinaryWriter::writeSymbol(const SymbolCode &symbolCode) {
  87.     if (symbolCode.bitsInBuffer == CHAR_BIT) {
  88.         writeByte(symbolCode.buffer);
  89.     } else {
  90.         for (auto i = 1; i <= symbolCode.bitsInBuffer; i++) {
  91.             writeBit((symbolCode.buffer >> (CHAR_BIT - i)) & 1);
  92.         }
  93.     }
  94. }
  95.  
  96. void BinaryWriter::writeByte(const byte &value) {
  97.     if (code.bitsInBuffer == 0) {
  98.         data.push_back(value);
  99.     } else {
  100.         assert(code.bitsInBuffer != CHAR_BIT && "BinaryWriter buffer was full");
  101.         byte temp = code.buffer | (value >> code.bitsInBuffer);
  102.         data.push_back(temp);
  103.         code.buffer = value << code.bitsInBuffer;
  104.         code.bitsInBuffer = CHAR_BIT - code.bitsInBuffer;
  105.     }
  106. }
  107.  
  108. void BinaryWriter::writeBit(const bool &bit) {
  109.     code += bit;
  110.     if (code.bitsInBuffer == CHAR_BIT) {
  111.         data.push_back(code.buffer);
  112.         code.buffer = code.bitsInBuffer = 0;
  113.     }
  114. }
  115.  
  116. void BinaryWriter::writeBuffer() {
  117.     data.push_back(code.buffer);
  118.     data.push_back(code.bitsInBuffer);
  119. }
  120.  
  121. class HuffmanCoder {
  122. private:
  123.     struct Node {
  124.         Node *left;
  125.         Node *right;
  126.         byte frequency;
  127.         byte symbol;
  128.  
  129.         explicit Node(const byte symbol) : symbol(symbol), frequency(0), left(nullptr), right(nullptr) {};
  130.         Node(const byte symbol, const byte frequency) : symbol(symbol), frequency(frequency), left(nullptr), right(nullptr) {};
  131.         Node(const byte frequency, Node *left, Node *right)
  132.                 : symbol('\0'), frequency(frequency), left(left), right(right) {};
  133.  
  134.         struct Comparator {
  135.             bool operator()(const Node *first, const Node *second) {
  136.                 return first->frequency > second->frequency;
  137.             }
  138.         };
  139.     };
  140.     Node *root;
  141.     std::unordered_map<byte, SymbolCode> symbolCodes;
  142.  
  143.     void buildTree(std::vector<byte> &data);
  144.     void deleteTree(Node* node);
  145.     byte getTreeSize(Node *node);
  146.     void writeTree(BinaryWriter &writer);
  147.     void writeNode(BinaryWriter &writer, Node *node);
  148.     void readTree(std::vector<byte> &data);
  149.     void addNodeToTree(const byte &symbol, Node *&node, bool &nodeAdded);
  150.     void createSymbolsCodes(Node *node, SymbolCode currentCode);
  151.     bool findSymbolByCode(std::pair<byte, SymbolCode> &pair);
  152.  
  153. public:
  154.     HuffmanCoder() : root(nullptr) {};
  155.     ~HuffmanCoder() {
  156.         deleteTree(root);
  157.     }
  158.  
  159.     std::vector<byte> encodeData(std::vector<byte> &data);
  160.     std::vector<byte> decodeData(std::vector<byte> &data);
  161. };
  162.  
  163. void HuffmanCoder::buildTree(std::vector<byte> &data) {
  164.     std::unordered_map<byte, int> frequencyMap;
  165.     for (const auto a : data) {
  166.         frequencyMap[a]++;
  167.     }
  168.  
  169.     std::priority_queue<Node*, std::vector<Node*>, Node::Comparator> queue;
  170.     for (const auto pair : frequencyMap) {
  171.         Node* tempNode = new Node(pair.first, pair.second);
  172.         queue.push(tempNode);
  173.     }
  174.  
  175.     if (queue.size() == 1) {
  176.         root = new Node('\0');
  177.         root->left = queue.top();
  178.     } else {
  179.         while (queue.size() != 1) {
  180.             Node *left = queue.top();
  181.             queue.pop();
  182.             Node *right = queue.top();
  183.             queue.pop();
  184.             Node *tempNode = new Node(left->frequency + right->frequency, left, right);
  185.             queue.push(tempNode);
  186.         }
  187.         root = queue.top();
  188.     }
  189. }
  190.  
  191. void HuffmanCoder::deleteTree(Node *node) {
  192.     if (node == nullptr) {
  193.         return;
  194.     }
  195.     deleteTree(node->left);
  196.     deleteTree(node->right);
  197.     delete node;
  198. }
  199.  
  200. byte HuffmanCoder::getTreeSize(Node *node) {
  201.     if (node == nullptr) {
  202.         return 0;
  203.     }
  204.     return getTreeSize(node->left) + getTreeSize(node->right) + 1;
  205. }
  206.  
  207. void HuffmanCoder::writeTree(BinaryWriter &writer) {
  208.     writer.writeByte(getTreeSize(root));
  209.     writeNode(writer, root);
  210. }
  211.  
  212. void HuffmanCoder::writeNode(BinaryWriter &writer, Node *node) {
  213.     if (node == nullptr) {
  214.         return;
  215.     }
  216.     writer.writeByte(node->symbol);
  217.     writeNode(writer, node->left);
  218.     writeNode(writer, node->right);
  219. }
  220.  
  221. void HuffmanCoder::readTree(std::vector<byte> &data) {
  222.     assert(!data.empty() && "Try to read Huffman tree from empty data");
  223.     byte treeSize = data.front();
  224.     for (auto i = 1; i <= treeSize; i++) {
  225.         bool flag = false;
  226.         addNodeToTree(data[i], root, flag);
  227.     }
  228. }
  229.  
  230. void HuffmanCoder::addNodeToTree(const byte &symbol, HuffmanCoder::Node *&node, bool &nodeAdded) {
  231.     if (nodeAdded || (node != nullptr && node->symbol != '\0')) {
  232.         return;
  233.     } else if (node == nullptr) {
  234.         node = new Node(symbol);
  235.         nodeAdded = true;
  236.         return;
  237.     } else {
  238.         addNodeToTree(symbol, node->left, nodeAdded);
  239.         addNodeToTree(symbol, node->right, nodeAdded);
  240.     }
  241. }
  242.  
  243. void HuffmanCoder::createSymbolsCodes(Node *node, SymbolCode currentCode) {
  244.     if (node == nullptr) {
  245.         return;
  246.     }
  247.     if (node->symbol != '\0') {
  248.         symbolCodes[node->symbol] = currentCode;
  249.     } else {
  250.         createSymbolsCodes(node->left, currentCode + false);
  251.         createSymbolsCodes(node->right, currentCode + true);
  252.     }
  253. }
  254.  
  255. bool HuffmanCoder::findSymbolByCode(std::pair<byte, SymbolCode> &pair) {
  256.     assert(pair.second.bitsInBuffer != 0 && "Try to find empty symbol in the tree");
  257.     Node *tempNode = root;
  258.     for (auto i = 1; i <= pair.second.bitsInBuffer; i++) {
  259.         if (pair.second.buffer >> (CHAR_BIT - i) & 1) {
  260.             tempNode = tempNode->right;
  261.         } else {
  262.             tempNode = tempNode->left;
  263.         }
  264.     }
  265.     if (tempNode->symbol != '\0') {
  266.         pair.first = tempNode->symbol;
  267.         return true;
  268.     }
  269.     return false;
  270. }
  271.  
  272. std::vector<byte> HuffmanCoder::encodeData(std::vector<byte> &data) {
  273.     buildTree(data);
  274.     createSymbolsCodes(root, SymbolCode(0, 0));
  275.     std::vector<byte> encodedData;
  276.     BinaryWriter writer(encodedData);
  277.     writeTree(writer);
  278.     for (const auto a : data) {
  279.         auto symbolCode = symbolCodes[a];
  280.         writer.writeSymbol(symbolCode);
  281.     }
  282.     writer.writeBuffer();
  283.     return encodedData;
  284. }
  285.  
  286. std::vector<byte> HuffmanCoder::decodeData(std::vector<byte> &data) {
  287.     readTree(data);
  288.     std::vector<byte> decodedData;
  289.     BinaryWriter writer(decodedData);
  290.     BinaryReader reader(data.begin() + getTreeSize(root) + 1, data.end());
  291.  
  292.     std::pair<byte, SymbolCode> symbol{'\0', SymbolCode()};
  293.     bool bit;
  294.     while (reader.readBit(bit)) {
  295.         symbol.second += bit;
  296.         if (findSymbolByCode(symbol)) {
  297.             writer.writeByte(symbol.first);
  298.             symbol = std::make_pair('\0', SymbolCode());
  299.         }
  300.     }
  301.     return decodedData;
  302. }
  303.  
  304. std::vector<byte> getDataFromStream(CInputStream &stream) {
  305.     std::vector<byte> data;
  306.     byte temp;
  307.     while(stream.Read(temp)) {
  308.         data.push_back(temp);
  309.     }
  310.     return data;
  311. }
  312.  
  313. void writeDataInStream(COutputStream &stream, std::vector<byte> &data) {
  314.     for (const auto a : data) {
  315.         stream.Write(a);
  316.     }
  317. }
  318.  
  319. void Encode(CInputStream& original, COutputStream& compressed) {
  320.     std::vector<byte> originalData = getDataFromStream(original);
  321.     HuffmanCoder huffmanCoder = HuffmanCoder();
  322.     std::vector<byte> compressedData = huffmanCoder.encodeData(originalData);
  323.     writeDataInStream(compressed, compressedData);
  324. }
  325.  
  326. void Decode(CInputStream& compressed, COutputStream& original) {
  327.     std::vector<byte> compressedData = getDataFromStream(compressed);
  328.     HuffmanCoder huffmanCoder = HuffmanCoder();
  329.     std::vector<byte> originalData = huffmanCoder.decodeData(compressedData);
  330.     writeDataInStream(original, originalData);
  331. }
  332.  
  333. int main() {
  334.     {
  335.         CInputStream inputStream("input.txt");
  336.         COutputStream outputStream("output.txt");
  337.         Encode(inputStream, outputStream);
  338.     }
  339.     {
  340.         CInputStream inputStream("output.txt");
  341.         COutputStream outputStream("input.txt");
  342.         Decode(inputStream, outputStream);
  343.     }
  344.  
  345.     return 0;
  346. }
Add Comment
Please, Sign In to add comment