Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --[[
- LUA MODULE
- compress.deflatelua - deflate (and gunzip/zlib) implemented in Lua.
- SYNOPSIS
- local DEFLATE = require 'compress.deflatelua'
- -- uncompress gzip file
- local fh = assert(io.open'foo.txt.gz', 'rb')
- local ofh = assert(io.open'foo.txt', 'wb')
- DEFLATE.gunzip {input=fh, output=ofh}
- fh:close(); ofh:close()
- -- can also uncompress from string including zlib and raw DEFLATE formats.
- DESCRIPTION
- This is a pure Lua implementation of decompressing the DEFLATE format,
- including the related zlib and gzip formats.
- Note: This library only supports decompression.
- Compression is not currently implemented.
- API
- Note: in the following functions, input stream `fh` may be
- a file handle, string, or an iterator function that returns strings.
- Output stream `ofh` may be a file handle or a function that
- consumes one byte (number 0..255) per call.
- DEFLATE.inflate {input=fh, output=ofh}
- Decompresses input stream `fh` in the DEFLATE format
- while writing to output stream `ofh`.
- DEFLATE is detailed in http://tools.ietf.org/html/rfc1951 .
- DEFLATE.gunzip {input=fh, output=ofh, disable_crc=disable_crc}
- Decompresses input stream `fh` with the gzip format
- while writing to output stream `ofh`.
- `disable_crc` (defaults to `false`) will disable CRC-32 checking
- to increase speed.
- gzip is detailed in http://tools.ietf.org/html/rfc1952 .
- DEFLATE.inflate_zlib {input=fh, output=ofh, disable_crc=disable_crc}
- Decompresses input stream `fh` with the zlib format
- while writing to output stream `ofh`.
- `disable_crc` (defaults to `false`) will disable CRC-32 checking
- to increase speed.
- zlib is detailed in http://tools.ietf.org/html/rfc1950 .
- DEFLATE.adler32(byte, crc) --> rcrc
- Returns adler32 checksum of byte `byte` (number 0..255) appended
- to string with adler32 checksum `crc`. This is internally used by
- `inflate_zlib`.
- ADLER32 in detailed in http://tools.ietf.org/html/rfc1950 .
- COMMAND LINE UTILITY
- A `gunziplua` command line utility (in folder `bin`) is also provided.
- This mimicks the *nix `gunzip` utility but is a pure Lua implementation
- that invokes this library. For help do
- gunziplua -h
- DEPENDENCIES
- Requires 'digest.crc32lua' (used for optional CRC-32 checksum checks).
- https://github.com/davidm/lua-digest-crc32lua
- Will use a bit library ('bit', 'bit32', 'bit.numberlua') if available. This
- is not that critical for this library but is required by digest.crc32lua.
- 'pythonic.optparse' is only required by the optional `gunziplua`
- command-line utilty for command line parsing.
- https://github.com/davidm/lua-pythonic-optparse
- INSTALLATION
- Copy the `compress` directory into your LUA_PATH.
- REFERENCES
- [1] DEFLATE Compressed Data Format Specification version 1.3
- http://tools.ietf.org/html/rfc1951
- [2] GZIP file format specification version 4.3
- http://tools.ietf.org/html/rfc1952
- [3] http://en.wikipedia.org/wiki/DEFLATE
- [4] pyflate, by Paul Sladen
- http://www.paul.sladen.org/projects/pyflate/
- [5] Compress::Zlib::Perl - partial pure Perl implementation of
- Compress::Zlib
- http://search.cpan.org/~nwclark/Compress-Zlib-Perl/Perl.pm
- LICENSE
- (c) 2008-2011 David Manura. Licensed under the same terms as Lua (MIT).
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- (end license)
- --]]
- local M = {_TYPE='module', _NAME='compress.deflatelua', _VERSION='0.3.20111128'}
- local assert = assert
- local error = error
- local ipairs = ipairs
- local pairs = pairs
- local print = print
- local require = require
- local tostring = tostring
- local type = type
- local setmetatable = setmetatable
- local io = io
- local math = math
- local table_sort = table.sort
- local math_max = math.max
- local string_char = string.char
- --[[
- Requires the first module listed that exists, else raises like `require`.
- If a non-string is encountered, it is returned.
- Second return value is module name loaded (or '').
- --]]
- local function requireany(...)
- local errs = {}
- for i = 1, select('#', ...) do local name = select(i, ...)
- if type(name) ~= 'string' then return name, '' end
- local ok, mod = pcall(require, name)
- if ok then return mod, name end
- errs[#errs+1] = mod
- end
- error(table.concat(errs, '\n'), 2)
- end
- local crc32 = require "digest.crc32lua" . crc32_byte
- local bit, name_ = requireany('bit', 'bit32', 'bit.numberlua', nil)
- local DEBUG = false
- -- Whether to use `bit` library functions in current module.
- -- Unlike the crc32 library, it doesn't make much difference in this module.
- local NATIVE_BITOPS = (bit ~= nil)
- local band, lshift, rshift
- if NATIVE_BITOPS then
- band = bit.band
- lshift = bit.lshift
- rshift = bit.rshift
- end
- local function warn(s)
- io.stderr:write(s, '\n')
- end
- local function debug(...)
- print('DEBUG', ...)
- end
- local function runtime_error(s, level)
- level = level or 1
- error({s}, level+1)
- end
- local function make_outstate(outbs)
- local outstate = {}
- outstate.outbs = outbs
- outstate.window = {}
- outstate.window_pos = 1
- return outstate
- end
- local function output(outstate, byte)
- -- debug('OUTPUT:', s)
- local window_pos = outstate.window_pos
- outstate.outbs(byte)
- outstate.window[window_pos] = byte
- outstate.window_pos = window_pos % 32768 + 1 -- 32K
- end
- local function noeof(val)
- return assert(val, 'unexpected end of file')
- end
- local function hasbit(bits, bit)
- return bits % (bit + bit) >= bit
- end
- local function memoize(f)
- local mt = {}
- local t = setmetatable({}, mt)
- function mt:__index(k)
- local v = f(k)
- t[k] = v
- return v
- end
- return t
- end
- -- small optimization (lookup table for powers of 2)
- local pow2 = memoize(function(n) return 2^n end)
- --local tbits = memoize(
- -- function(bits)
- -- return memoize( function(bit) return getbit(bits, bit) end )
- -- end )
- -- weak metatable marking objects as bitstream type
- local is_bitstream = setmetatable({}, {__mode='k'})
- -- DEBUG
- -- prints LSB first
- --[[
- local function bits_tostring(bits, nbits)
- local s = ''
- local tmp = bits
- local function f()
- local b = tmp % 2 == 1 and 1 or 0
- s = s .. b
- tmp = (tmp - b) / 2
- end
- if nbits then
- for i=1,nbits do f() end
- else
- while tmp ~= 0 do f() end
- end
- return s
- end
- --]]
- local function bytestream_from_file(fh)
- local o = {}
- function o:read()
- local sb = fh:read(1)
- if sb then return sb:byte() end
- end
- return o
- end
- local function bytestream_from_string(s)
- local i = 1
- local o = {}
- function o:read()
- local by
- if i <= #s then
- by = s:byte(i)
- i = i + 1
- end
- return by
- end
- return o
- end
- local function bytestream_from_function(f)
- local i = 0
- local buffer = ''
- local o = {}
- function o:read()
- i = i + 1
- if i > #buffer then
- buffer = f()
- if not buffer then return end
- i = 1
- end
- return buffer:byte(i,i)
- end
- return o
- end
- local function bitstream_from_bytestream(bys)
- local buf_byte = 0
- local buf_nbit = 0
- local o = {}
- function o:nbits_left_in_byte()
- return buf_nbit
- end
- if NATIVE_BITOPS then
- function o:read(nbits)
- nbits = nbits or 1
- while buf_nbit < nbits do
- local byte = bys:read()
- if not byte then return end -- note: more calls also return nil
- buf_byte = buf_byte + lshift(byte, buf_nbit)
- buf_nbit = buf_nbit + 8
- end
- local bits
- if nbits == 0 then
- bits = 0
- elseif nbits == 32 then
- bits = buf_byte
- buf_byte = 0
- else
- bits = band(buf_byte, rshift(0xffffffff, 32 - nbits))
- buf_byte = rshift(buf_byte, nbits)
- end
- buf_nbit = buf_nbit - nbits
- return bits
- end
- else
- function o:read(nbits)
- nbits = nbits or 1
- while buf_nbit < nbits do
- local byte = bys:read()
- if not byte then return end -- note: more calls also return nil
- buf_byte = buf_byte + pow2[buf_nbit] * byte
- buf_nbit = buf_nbit + 8
- end
- local m = pow2[nbits]
- local bits = buf_byte % m
- buf_byte = (buf_byte - bits) / m
- buf_nbit = buf_nbit - nbits
- return bits
- end
- end
- is_bitstream[o] = true
- return o
- end
- local function get_bitstream(o)
- local bs
- if is_bitstream[o] then
- return o
- elseif io.type(o) == 'file' then
- bs = bitstream_from_bytestream(bytestream_from_file(o))
- elseif type(o) == 'string' then
- bs = bitstream_from_bytestream(bytestream_from_string(o))
- elseif type(o) == 'function' then
- bs = bitstream_from_bytestream(bytestream_from_function(o))
- else
- runtime_error 'unrecognized type'
- end
- return bs
- end
- local function get_obytestream(o)
- local bs
- if io.type(o) == 'file' then
- bs = function(sbyte) o:write(string_char(sbyte)) end
- elseif type(o) == 'function' then
- bs = o
- else
- runtime_error('unrecognized type: ' .. tostring(o))
- end
- return bs
- end
- local function HuffmanTable(init, is_full)
- local t = {}
- if is_full then
- for val,nbits in pairs(init) do
- if nbits ~= 0 then
- t[#t+1] = {val=val, nbits=nbits}
- --debug('*',val,nbits)
- end
- end
- else
- for i=1,#init-2,2 do
- local firstval, nbits, nextval = init[i], init[i+1], init[i+2]
- --debug(val, nextval, nbits)
- if nbits ~= 0 then
- for val=firstval,nextval-1 do
- t[#t+1] = {val=val, nbits=nbits}
- end
- end
- end
- end
- table_sort(t, function(a,b)
- return a.nbits == b.nbits and a.val < b.val or a.nbits < b.nbits
- end)
- -- assign codes
- local code = 1 -- leading 1 marker
- local nbits = 0
- for i,s in ipairs(t) do
- if s.nbits ~= nbits then
- code = code * pow2[s.nbits - nbits]
- nbits = s.nbits
- end
- s.code = code
- --debug('huffman code:', i, s.nbits, s.val, code, bits_tostring(code))
- code = code + 1
- end
- local minbits = math.huge
- local look = {}
- for i,s in ipairs(t) do
- minbits = math.min(minbits, s.nbits)
- look[s.code] = s.val
- end
- --for _,o in ipairs(t) do
- -- debug(':', o.nbits, o.val)
- --end
- -- function t:lookup(bits) return look[bits] end
- local msb = NATIVE_BITOPS and function(bits, nbits)
- local res = 0
- for i=1,nbits do
- res = lshift(res, 1) + band(bits, 1)
- bits = rshift(bits, 1)
- end
- return res
- end or function(bits, nbits)
- local res = 0
- for i=1,nbits do
- local b = bits % 2
- bits = (bits - b) / 2
- res = res * 2 + b
- end
- return res
- end
- local tfirstcode = memoize(
- function(bits) return pow2[minbits] + msb(bits, minbits) end)
- function t:read(bs)
- local code = 1 -- leading 1 marker
- local nbits = 0
- while 1 do
- if nbits == 0 then -- small optimization (optional)
- code = tfirstcode[noeof(bs:read(minbits))]
- nbits = nbits + minbits
- else
- local b = noeof(bs:read())
- nbits = nbits + 1
- code = code * 2 + b -- MSB first
- --[[NATIVE_BITOPS
- code = lshift(code, 1) + b -- MSB first
- --]]
- end
- --debug('code?', code, bits_tostring(code))
- local val = look[code]
- if val then
- --debug('FOUND', val)
- return val
- end
- end
- end
- return t
- end
- local function parse_gzip_header(bs)
- -- local FLG_FTEXT = 2^0
- local FLG_FHCRC = 2^1
- local FLG_FEXTRA = 2^2
- local FLG_FNAME = 2^3
- local FLG_FCOMMENT = 2^4
- local id1 = bs:read(8)
- local id2 = bs:read(8)
- if id1 ~= 31 or id2 ~= 139 then
- runtime_error 'not in gzip format'
- end
- local cm = bs:read(8) -- compression method
- local flg = bs:read(8) -- FLaGs
- local mtime = bs:read(32) -- Modification TIME
- local xfl = bs:read(8) -- eXtra FLags
- local os = bs:read(8) -- Operating System
- if DEBUG then
- debug("CM=", cm)
- debug("FLG=", flg)
- debug("MTIME=", mtime)
- -- debug("MTIME_str=",os.date("%Y-%m-%d %H:%M:%S",mtime)) -- non-portable
- debug("XFL=", xfl)
- debug("OS=", os)
- end
- if not os then runtime_error 'invalid header' end
- if hasbit(flg, FLG_FEXTRA) then
- local xlen = bs:read(16)
- local extra = 0
- for i=1,xlen do
- extra = bs:read(8)
- end
- if not extra then runtime_error 'invalid header' end
- end
- local function parse_zstring(bs)
- repeat
- local by = bs:read(8)
- if not by then runtime_error 'invalid header' end
- until by == 0
- end
- if hasbit(flg, FLG_FNAME) then
- parse_zstring(bs)
- end
- if hasbit(flg, FLG_FCOMMENT) then
- parse_zstring(bs)
- end
- if hasbit(flg, FLG_FHCRC) then
- local crc16 = bs:read(16)
- if not crc16 then runtime_error 'invalid header' end
- -- IMPROVE: check CRC. where is an example .gz file that
- -- has this set?
- if DEBUG then
- debug("CRC16=", crc16)
- end
- end
- end
- local function parse_zlib_header(bs)
- local cm = bs:read(4) -- Compression Method
- local cinfo = bs:read(4) -- Compression info
- local fcheck = bs:read(5) -- FLaGs: FCHECK (check bits for CMF and FLG)
- local fdict = bs:read(1) -- FLaGs: FDICT (present dictionary)
- local flevel = bs:read(2) -- FLaGs: FLEVEL (compression level)
- local cmf = cinfo * 16 + cm -- CMF (Compresion Method and flags)
- local flg = fcheck + fdict * 32 + flevel * 64 -- FLaGs
- if cm ~= 8 then -- not "deflate"
- runtime_error("unrecognized zlib compression method: " + cm)
- end
- if cinfo > 7 then
- runtime_error("invalid zlib window size: cinfo=" + cinfo)
- end
- local window_size = 2^(cinfo + 8)
- if (cmf*256 + flg) % 31 ~= 0 then
- runtime_error("invalid zlib header (bad fcheck sum)")
- end
- if fdict == 1 then
- runtime_error("FIX:TODO - FDICT not currently implemented")
- local dictid_ = bs:read(32)
- end
- return window_size
- end
- local function parse_huffmantables(bs)
- local hlit = bs:read(5) -- # of literal/length codes - 257
- local hdist = bs:read(5) -- # of distance codes - 1
- local hclen = noeof(bs:read(4)) -- # of code length codes - 4
- local ncodelen_codes = hclen + 4
- local codelen_init = {}
- local codelen_vals = {
- 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
- for i=1,ncodelen_codes do
- local nbits = bs:read(3)
- local val = codelen_vals[i]
- codelen_init[val] = nbits
- end
- local codelentable = HuffmanTable(codelen_init, true)
- local function decode(ncodes)
- local init = {}
- local nbits
- local val = 0
- while val < ncodes do
- local codelen = codelentable:read(bs)
- --FIX:check nil?
- local nrepeat
- if codelen <= 15 then
- nrepeat = 1
- nbits = codelen
- --debug('w', nbits)
- elseif codelen == 16 then
- nrepeat = 3 + noeof(bs:read(2))
- -- nbits unchanged
- elseif codelen == 17 then
- nrepeat = 3 + noeof(bs:read(3))
- nbits = 0
- elseif codelen == 18 then
- nrepeat = 11 + noeof(bs:read(7))
- nbits = 0
- else
- error 'ASSERT'
- end
- for i=1,nrepeat do
- init[val] = nbits
- val = val + 1
- end
- end
- local huffmantable = HuffmanTable(init, true)
- return huffmantable
- end
- local nlit_codes = hlit + 257
- local ndist_codes = hdist + 1
- local littable = decode(nlit_codes)
- local disttable = decode(ndist_codes)
- return littable, disttable
- end
- local tdecode_len_base
- local tdecode_len_nextrabits
- local tdecode_dist_base
- local tdecode_dist_nextrabits
- local function parse_compressed_item(bs, outstate, littable, disttable)
- local val = littable:read(bs)
- --debug(val, val < 256 and string_char(val))
- if val < 256 then -- literal
- output(outstate, val)
- elseif val == 256 then -- end of block
- return true
- else
- if not tdecode_len_base then
- local t = {[257]=3}
- local skip = 1
- for i=258,285,4 do
- for j=i,i+3 do t[j] = t[j-1] + skip end
- if i ~= 258 then skip = skip * 2 end
- end
- t[285] = 258
- tdecode_len_base = t
- --for i=257,285 do debug('T1',i,t[i]) end
- end
- if not tdecode_len_nextrabits then
- local t = {}
- if NATIVE_BITOPS then
- for i=257,285 do
- local j = math_max(i - 261, 0)
- t[i] = rshift(j, 2)
- end
- else
- for i=257,285 do
- local j = math_max(i - 261, 0)
- t[i] = (j - (j % 4)) / 4
- end
- end
- t[285] = 0
- tdecode_len_nextrabits = t
- --for i=257,285 do debug('T2',i,t[i]) end
- end
- local len_base = tdecode_len_base[val]
- local nextrabits = tdecode_len_nextrabits[val]
- local extrabits = bs:read(nextrabits)
- local len = len_base + extrabits
- if not tdecode_dist_base then
- local t = {[0]=1}
- local skip = 1
- for i=1,29,2 do
- for j=i,i+1 do t[j] = t[j-1] + skip end
- if i ~= 1 then skip = skip * 2 end
- end
- tdecode_dist_base = t
- --for i=0,29 do debug('T3',i,t[i]) end
- end
- if not tdecode_dist_nextrabits then
- local t = {}
- if NATIVE_BITOPS then
- for i=0,29 do
- local j = math_max(i - 2, 0)
- t[i] = rshift(j, 1)
- end
- else
- for i=0,29 do
- local j = math_max(i - 2, 0)
- t[i] = (j - (j % 2)) / 2
- end
- end
- tdecode_dist_nextrabits = t
- --for i=0,29 do debug('T4',i,t[i]) end
- end
- local dist_val = disttable:read(bs)
- local dist_base = tdecode_dist_base[dist_val]
- local dist_nextrabits = tdecode_dist_nextrabits[dist_val]
- local dist_extrabits = bs:read(dist_nextrabits)
- local dist = dist_base + dist_extrabits
- --debug('BACK', len, dist)
- for i=1,len do
- local pos = (outstate.window_pos - 1 - dist) % 32768 + 1 -- 32K
- output(outstate, assert(outstate.window[pos], 'invalid distance'))
- end
- end
- return false
- end
- local function parse_block(bs, outstate)
- local bfinal = bs:read(1)
- local btype = bs:read(2)
- local BTYPE_NO_COMPRESSION = 0
- local BTYPE_FIXED_HUFFMAN = 1
- local BTYPE_DYNAMIC_HUFFMAN = 2
- local BTYPE_RESERVED_ = 3
- if DEBUG then
- debug('bfinal=', bfinal)
- debug('btype=', btype)
- end
- if btype == BTYPE_NO_COMPRESSION then
- bs:read(bs:nbits_left_in_byte())
- local len = bs:read(16)
- local nlen_ = noeof(bs:read(16))
- for i=1,len do
- local by = noeof(bs:read(8))
- output(outstate, by)
- end
- elseif btype == BTYPE_FIXED_HUFFMAN or btype == BTYPE_DYNAMIC_HUFFMAN then
- local littable, disttable
- if btype == BTYPE_DYNAMIC_HUFFMAN then
- littable, disttable = parse_huffmantables(bs)
- else
- littable = HuffmanTable {0,8, 144,9, 256,7, 280,8, 288,nil}
- disttable = HuffmanTable {0,5, 32,nil}
- end
- repeat
- local is_done = parse_compressed_item(
- bs, outstate, littable, disttable)
- until is_done
- else
- runtime_error 'unrecognized compression type'
- end
- return bfinal ~= 0
- end
- function M.inflate(t)
- local bs = get_bitstream(t.input)
- local outbs = get_obytestream(t.output)
- local outstate = make_outstate(outbs)
- repeat
- local is_final = parse_block(bs, outstate)
- until is_final
- end
- local inflate = M.inflate
- function M.gunzip(t)
- local bs = get_bitstream(t.input)
- local outbs = get_obytestream(t.output)
- local disable_crc = t.disable_crc
- if disable_crc == nil then disable_crc = false end
- parse_gzip_header(bs)
- local data_crc32 = 0
- inflate{input=bs, output=
- disable_crc and outbs or
- function(byte)
- data_crc32 = crc32(byte, data_crc32)
- outbs(byte)
- end
- }
- bs:read(bs:nbits_left_in_byte())
- local expected_crc32 = bs:read(32)
- local isize = bs:read(32) -- ignored
- if DEBUG then
- debug('crc32=', expected_crc32)
- debug('isize=', isize)
- end
- if not disable_crc and data_crc32 then
- if data_crc32 ~= expected_crc32 then
- runtime_error('invalid compressed data--crc error')
- end
- end
- if bs:read() then
- warn 'trailing garbage ignored'
- end
- end
- function M.adler32(byte, crc)
- local s1 = crc % 65536
- local s2 = (crc - s1) / 65536
- s1 = (s1 + byte) % 65521
- s2 = (s2 + s1) % 65521
- return s2*65536 + s1
- end -- 65521 is the largest prime smaller than 2^16
- function M.inflate_zlib(t)
- local bs = get_bitstream(t.input)
- local outbs = get_obytestream(t.output)
- local disable_crc = t.disable_crc
- if disable_crc == nil then disable_crc = false end
- local window_size_ = parse_zlib_header(bs)
- local data_adler32 = 1
- inflate{input=bs, output=
- disable_crc and outbs or
- function(byte)
- data_adler32 = M.adler32(byte, data_adler32)
- outbs(byte)
- end
- }
- bs:read(bs:nbits_left_in_byte())
- local b3 = bs:read(8)
- local b2 = bs:read(8)
- local b1 = bs:read(8)
- local b0 = bs:read(8)
- local expected_adler32 = ((b3*256 + b2)*256 + b1)*256 + b0
- if DEBUG then
- debug('alder32=', expected_adler32)
- end
- if not disable_crc then
- if data_adler32 ~= expected_adler32 then
- runtime_error('invalid compressed data--crc error')
- end
- end
- if bs:read() then
- warn 'trailing garbage ignored'
- end
- end
- return M
Add Comment
Please, Sign In to add comment