Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- local HuffmanCompression = {}
- -- some sample functions
- local function sortFrequencies(a, b)
- return a.frequency < b.frequency
- end
- -- compress
- function HuffmanCompression:compress(stream, characterBase)
- -- pre
- assert(
- type(stream) == 'table' and #stream > 0 and
- type(characterBase) == 'table' and #characterBase > 1
- )
- for i, v in next, stream do
- assert(
- type(i) == 'number' and
- i > 0 and
- v ~= nil
- )
- end
- for i, v in next, characterBase do
- assert(
- type(i) == 'number' and
- type(v) == 'string'
- )
- end
- -- main
- local resultDictionary = {}
- local resultString = ''
- local valueFrequencyHashmap = {}
- local tree = {}
- -- populate freq hashmap
- for _, v in next, stream do
- valueFrequencyHashmap[v] = (valueFrequencyHashmap[v] or 0) + 1
- end
- -- populate tree, so that it has multiple roots
- for i, v in next, valueFrequencyHashmap do
- table.insert(tree, {
- value = i;
- frequency = v;
- children = nil;
- })
- end
- --[[ for debuging
- --]]
- local function printTree(t,l)
- l = l or 1
- local s = ('%s>%s'):format(
- (' '):rep(l),
- t.frequency
- )
- if t.value then
- s ..= '|' .. tostring(t.value)
- end
- print(s)
- if t.children then
- for _, v in next, t.children do
- printTree(v, l + 1)
- end
- end
- end
- --[[arrange tree so that the tree has multiple branches from one root
- instead of having multiple roots each with no children
- --]]
- local function sortRoots(currentTree, limit)
- limit = limit or 1
- while #currentTree > limit do
- -- sort first, so that the ones with least frequencies appear first
- table.sort(currentTree, sortFrequencies)
- -- obtain the two least values
- local rootA, rootB = unpack(currentTree, 1, 2)
- -- erase the roots from the tree
- table.remove(currentTree, 2)
- --[[the way the tree is structured is that childless
- nodes (end nodes) are the value structs, and the other nodes
- (connecting nodes)
- with children are responsible for generating indexes.
- Considering how we got two nodes, there's three situations
- that can happen, being the interactions with empty nodes and
- connecting nodes
- First off, if we get two empty nodes, then the tree will replace
- first value with a connecting node, since the empty nodes dont have children
- --]]
- local connectingNode
- if not rootA.children and not rootB.children then
- connectingNode = {
- frequency = rootA.frequency + rootB.frequency;
- children = {
- rootA,
- rootB
- }
- }
- else
- --[[ considering how one of them has children, we can transfer a node or
- their children into the other node
- --]]
- connectingNode = rootA.children and rootA or rootB
- local otherNode = rootA.children and rootB or rootA
- local childNodes = otherNode.children and #otherNode.children or 1
- local connectingNodeChildren = connectingNode.children
- -- insert nodes anyway
- if otherNode.children then
- for _, v in next, otherNode.children do
- table.insert(connectingNodeChildren, v)
- end
- else
- table.insert(connectingNodeChildren, otherNode)
- end
- --[[considering the amount of nodes we can add is 1 or more,
- we should check if the amount of child nodes is sufficient
- to put in, if this condition is met, then transfer all of
- child nodes into the connecting node
- --]]
- --[[considering this alt conditional, being how there isn't enough room,
- we should once again prioritize more frequent nodes with more
- frequencies
- --]]
- if #connectingNodeChildren >= #characterBase then
- sortRoots(connectingNodeChildren, #characterBase)
- end
- -- update connecting node with new frequencies
- connectingNode.frequency = 0
- for _, v in next, connectingNodeChildren do
- connectingNode.frequency += v.frequency
- end
- end
- currentTree[1] = connectingNode
- end
- end
- sortRoots(tree)
- -- construct string and dictionary at the same time but in the process
- -- create a hashmap for getting the new suffix
- -- however, we also need a function for actually getting the suffix so just
- -- create a local function just in case
- local function getSuffix(value, currentTree)
- local newSuffix
- currentTree = currentTree or tree
- for i, branch in next, currentTree do
- if branch.value == value then -- found branch
- newSuffix = characterBase[i]
- elseif branch.children then --- check children
- local suffix = getSuffix(value, branch.children)
- if suffix then
- newSuffix = characterBase[i] .. suffix
- end
- end
- if newSuffix then
- break
- end
- end
- return newSuffix
- end
- local hashMap = {}
- for _, v in next, stream do
- local suffix = hashMap[v]
- if not suffix then
- suffix = getSuffix(v):sub(2)
- hashMap[v] = suffix
- resultDictionary[suffix] = v
- end
- resultString ..= suffix
- end
- return resultDictionary, resultString
- end
- function HuffmanCompression:decompress(dict, str)
- -- pre
- assert(type(dict) == 'table' and type(str) == 'string')
- for i, v in next, dict do
- assert(type(i) == 'string')
- end
- -- main
- local result = {}
- local keyStart = 1
- local keyEnd = 0
- --[[Simplistic stuff, have two numbers responsible for setting the range
- of a key, start stays but the end increments until a valid key from
- the dictionary is met. in that case, move start after the end and insert
- the value
- --]]
- while keyEnd <= #str do
- keyEnd += 1
- local key = str:sub(keyStart, keyEnd)
- local value = dict[key]
- if value then
- table.insert(result, value)
- keyStart = keyEnd + 1
- end
- end
- -- post
- -- needed incase someone put a bad string
- assert(
- #str == keyStart - 1,
- ('stray key met. \nkeystart: %s\nkeyend: %s\nkey: %s'):format(
- keyStart,
- keyEnd,
- str:sub(keyStart)
- )
- )
- return result
- end
- return HuffmanCompression
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement