Advertisement
InTesting

HuffmanCompression Revision

Feb 20th, 2022
581
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 5.93 KB | None | 0 0
  1. local HuffmanCompression = {}
  2.  
  3. -- some sample functions
  4. local function sortFrequencies(a, b)
  5.     return a.frequency < b.frequency
  6. end
  7. -- compress
  8. function HuffmanCompression:compress(stream, characterBase)
  9.     -- pre
  10.     assert(
  11.         type(stream) == 'table' and #stream > 0 and
  12.             type(characterBase) == 'table' and #characterBase > 1
  13.     )
  14.  
  15.     for i, v in next, stream do
  16.         assert(
  17.             type(i) == 'number' and
  18.                 i > 0 and
  19.                 v ~= nil
  20.         )
  21.     end
  22.  
  23.     for i, v in next, characterBase do
  24.         assert(
  25.             type(i) == 'number' and
  26.                 type(v) == 'string'
  27.         )
  28.     end
  29.  
  30.     -- main
  31.  
  32.     local resultDictionary = {}
  33.     local resultString = ''
  34.  
  35.     local valueFrequencyHashmap = {}
  36.     local tree = {}
  37.  
  38.  
  39.     -- populate freq hashmap
  40.     for _, v in next, stream do
  41.         valueFrequencyHashmap[v] = (valueFrequencyHashmap[v] or 0) + 1
  42.     end
  43.  
  44.     -- populate tree, so that it has multiple roots
  45.     for i, v in next, valueFrequencyHashmap do
  46.         table.insert(tree, {
  47.             value = i;
  48.             frequency = v;
  49.             children = nil;
  50.         })
  51.     end
  52.  
  53.     --[[ for debuging
  54.     --]]
  55.     local function printTree(t,l)
  56.         l = l or 1
  57.         local s = ('%s>%s'):format(
  58.             (' '):rep(l),
  59.             t.frequency
  60.         )
  61.        
  62.         if t.value then
  63.             s ..= '|' ..  tostring(t.value)
  64.         end
  65.        
  66.         print(s)
  67.  
  68.         if t.children then
  69.             for _, v in next, t.children do
  70.                 printTree(v, l + 1)
  71.             end
  72.         end
  73.     end
  74.  
  75.     --[[arrange tree so that the tree has multiple branches from one root
  76.         instead of having multiple roots each with no children
  77.     --]]
  78.     local function sortRoots(currentTree, limit)
  79.         limit = limit or 1
  80.         while #currentTree > limit do
  81.             -- sort first, so that the ones with least frequencies appear first
  82.             table.sort(currentTree, sortFrequencies)
  83.  
  84.             -- obtain the two least values
  85.             local rootA, rootB = unpack(currentTree, 1, 2)
  86.  
  87.             -- erase the roots from the tree
  88.             table.remove(currentTree, 2)
  89.  
  90.             --[[the way the tree is structured is that childless
  91.                 nodes (end nodes) are the value structs, and the other nodes
  92.                 (connecting nodes)
  93.                 with children are responsible for generating indexes.
  94.                
  95.                 Considering how we got two nodes, there's three situations
  96.                 that can happen, being the interactions with empty nodes and
  97.                 connecting nodes
  98.                
  99.                 First off, if we get two empty nodes, then the tree will replace
  100.                 first value with a connecting node, since the empty nodes dont have children
  101.                
  102.             --]]
  103.             local connectingNode
  104.  
  105.             if not rootA.children and not rootB.children then
  106.                 connectingNode = {
  107.                     frequency = rootA.frequency + rootB.frequency;
  108.                     children = {
  109.                         rootA,
  110.                         rootB
  111.                     }
  112.                 }
  113.             else
  114.                 --[[ considering how one of them has children, we can transfer a node or
  115.                     their children into the other node
  116.                 --]]
  117.                 connectingNode = rootA.children and rootA or rootB
  118.                 local otherNode = rootA.children and rootB or rootA
  119.                 local childNodes = otherNode.children and #otherNode.children or 1
  120.                 local connectingNodeChildren = connectingNode.children
  121.  
  122.                 -- insert nodes anyway
  123.                 if otherNode.children then
  124.                     for _, v in next, otherNode.children do
  125.                         table.insert(connectingNodeChildren, v)
  126.                     end
  127.                 else
  128.                     table.insert(connectingNodeChildren, otherNode)
  129.                 end
  130.  
  131.                 --[[considering the amount of nodes we can add is 1 or more,
  132.                     we should check if the amount of child nodes is sufficient
  133.                     to put in, if this condition is met, then transfer all of
  134.                     child nodes into the connecting node
  135.                 --]]
  136.                 --[[considering this alt  conditional, being how there isn't enough room,
  137.                      we should once again prioritize more frequent nodes with more
  138.                      frequencies
  139.                 --]]
  140.  
  141.                 if #connectingNodeChildren >= #characterBase then
  142.  
  143.                     sortRoots(connectingNodeChildren, #characterBase)
  144.                 end
  145.  
  146.                 -- update connecting node with new frequencies
  147.                 connectingNode.frequency = 0
  148.  
  149.                 for _, v in next, connectingNodeChildren do
  150.                     connectingNode.frequency += v.frequency
  151.                 end
  152.  
  153.             end
  154.  
  155.             currentTree[1] = connectingNode
  156.         end
  157.     end
  158.  
  159.     sortRoots(tree)
  160.  
  161.     -- construct string and dictionary at the same time but in the process
  162.     -- create a hashmap for getting the new suffix
  163.     -- however, we also need a function for actually getting the suffix so just
  164.     -- create a local function just in case
  165.     local function getSuffix(value, currentTree)
  166.         local newSuffix
  167.         currentTree = currentTree or tree
  168.  
  169.         for i, branch in next, currentTree do
  170.  
  171.             if branch.value == value then -- found branch
  172.                 newSuffix = characterBase[i]
  173.             elseif branch.children then --- check children
  174.                 local suffix = getSuffix(value, branch.children)
  175.  
  176.                 if suffix then
  177.                     newSuffix = characterBase[i] .. suffix
  178.                 end
  179.             end
  180.  
  181.  
  182.             if newSuffix then
  183.                 break
  184.             end
  185.         end
  186.  
  187.         return newSuffix
  188.     end
  189.  
  190.     local hashMap = {}
  191.     for _, v in next, stream do
  192.         local suffix = hashMap[v]
  193.  
  194.         if not suffix then
  195.             suffix = getSuffix(v):sub(2)
  196.             hashMap[v] = suffix
  197.             resultDictionary[suffix] = v
  198.         end
  199.  
  200.         resultString ..= suffix
  201.     end
  202.    
  203.         return resultDictionary, resultString
  204. end
  205.  
  206. function HuffmanCompression:decompress(dict, str)
  207.     -- pre
  208.     assert(type(dict) == 'table' and type(str) == 'string')
  209.  
  210.     for i, v in next, dict do
  211.         assert(type(i) == 'string')
  212.     end
  213.  
  214.     -- main
  215.     local result = {}
  216.     local keyStart = 1
  217.     local keyEnd = 0
  218.  
  219.     --[[Simplistic stuff, have two numbers responsible for setting the range
  220.         of a key, start stays but the end increments until a valid key from
  221.         the dictionary is met. in that case, move start after the end and insert
  222.         the value
  223.     --]]
  224.     while keyEnd <= #str do
  225.         keyEnd += 1
  226.  
  227.         local key = str:sub(keyStart, keyEnd)
  228.         local value = dict[key]
  229.  
  230.         if value then
  231.             table.insert(result, value)
  232.             keyStart = keyEnd + 1
  233.         end
  234.     end
  235.  
  236.     -- post
  237.     -- needed incase someone put a bad string
  238.     assert(
  239.         #str == keyStart - 1,
  240.         ('stray key met. \nkeystart: %s\nkeyend: %s\nkey: %s'):format(
  241.         keyStart,
  242.         keyEnd,
  243.         str:sub(keyStart)
  244.         )
  245.     )
  246.  
  247.     return result
  248. end
  249.  
  250. return HuffmanCompression
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement