Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* My tiny gzip decompressor without using zlib. - Joel Yliluoma
- * http://iki.fi/bisqwit/ , http://youtube.com/user/Bisqwit
- * Inspired and influenced by a 13th IOCCC winner program by Ron McFarland */
- /* Fun fact: Contains zero new/delete, and no STL data structures */
- #include <assert.h>
- // This is the public method declared (later) in this file.
- template<typename InputFunctor, typename OutputFunctor>
- void Deflate(InputFunctor&& input, OutputFunctor&& output);
- // The rest of the file is just for the curious about implementation.
- namespace gunzip_ns
- {
- template<typename N, typename U=unsigned short>
- struct PoolType
- {
- N* const ptr; // Pointer to the beginning of storage for nodes
- U used; // How many nodes have been allocated
- void clear() { used=0; }
- N* get(U n) const { return n ? &ptr[n-1] : nullptr; }
- N* allocate(U& n) { return n ? get(n) : get(n = ++used)->Reset(); }
- };
- struct huffnode
- {
- unsigned short branch[2], value,_;
- huffnode* Reset() { branch[0]=0; branch[1]=0; return this; }
- // Create a huffman tree for num_values, with given lengths.
- // The tree will be put in branch[which]; other branch not touched.
- // Length = how many bits to use for encoding this value.
- void Create(unsigned which, unsigned num_values,
- const unsigned char lengths[], PoolType<huffnode>& pool)
- {
- branch[which] = 0;
- unsigned count[16] { 0 }, offs[16] { 0 };
- for(unsigned a = 0; a < num_values; ++a) count[lengths[a]] += 1;
- for(unsigned a = 0; a < 15; a++) offs[a + 1] = (offs[a] + count[a]) * 2u;
- // Largest values seen in wild for offs[16]: 16777216
- for(unsigned value = 0; value < num_values; ++value)
- if(unsigned length = lengths[value])
- {
- huffnode* node = pool.allocate(branch[which]);
- for(unsigned q = offs[lengths[value]]++; length--; )
- node = pool.allocate(node->branch[ (q >> length) & 1 ]);
- node->value = value;
- }
- }
- };
- // Length codes: base + number of bits (0-28)
- // Distance codes: base + number of bits (0-29)
- // Order of lengths for dynamic block decoding (0-18)
- constexpr unsigned short
- dbase[30] { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193 };
- constexpr unsigned char
- lbase[29] { 0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224,255 }, // +3
- lbits[29] { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0 },
- dbits[30] { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 },
- order[19] { 16,17,18,0, 8,7,9,6, 10,5,11,4, 12,3,13,2, 14,1,15 },
- fix[15] { 7,32,4, 8,35,2, 8,0,20, 9,18,14, 5,36,4 };
- }
- template<typename InputFunctor, typename OutputFunctor>
- void Deflate(InputFunctor&& input, OutputFunctor&& output)
- {
- using namespace gunzip_ns;
- // Bit-by-bit input routine
- struct { unsigned long Cache; unsigned Count,_; } Bits {0,0,0};
- auto GetBits = [&Bits,&input](unsigned l)
- {
- for (; Bits.Count < l; Bits.Count += 8)
- Bits.Cache |= (input() & 0xFFul) << Bits.Count;
- unsigned w = Bits.Cache & ((1ul << l) - 1);
- Bits.Cache >>= l;
- Bits.Count -= l;
- return w;
- };
- // Read stream header
- unsigned header = GetBits(16), winsize = 32768u;
- // ^ Read deflate header: method[4] ; winsize[4] ; checksum[8]
- if(header == 0x8B1F) // Is it actually a gzip header?
- {
- unsigned fmt = GetBits(8), o = GetBits(8); // Get format & flags
- assert(fmt == 8);
- GetBits(48); // MTIME (3 of 4); MTIME(1 of 4), XFL and OS
- if(o&4) // Skip extra fields as indicated by FEXTRA
- for(unsigned q = GetBits(16); q--; GetBits(8));
- if(o&8) while(GetBits(8)) {} // NAME: Skip filename if FNAME was present
- if(o&16) while(GetBits(8)) {} // COMMENT: Skip comment if FCOMMENT was present
- if(o&2) GetBits(16); // HCRC: Skip FCRC if was present
- }
- else // No. Deflate header?
- {
- winsize = 256 << ((header >> 4) & 0xF);
- assert((header & 0x200F) == 8 && !((((header<<8)+(header>>8))&0xFFFF)%31) && winsize <= 32768u);
- }
- // Output routine. Also updates the window for backward references.
- unsigned char WindowData[32768u/*winsize*/], Lengths[288+32]; // Lengths are in 0..15 range
- struct { unsigned Head,SizeMask; unsigned char* const Data; } Window {0,winsize-1, WindowData};
- auto Put = [&Window,&output](unsigned char l) { output(Window.Data[Window.Head++ & Window.SizeMask] = l); };
- // Function for reading a huffman-encoded value from bitstream.
- constexpr unsigned pool1_reserved=638, pool2_reserved=642;
- huffnode tables_fixed={{0,0},0,0}, tables_dyn, pool[pool1_reserved+pool2_reserved], *cur_table;
- PoolType<huffnode> pool1{&pool[pool2_reserved],0}, pool2{&pool[0],0}, *cur_pool;
- auto Read = [&GetBits](const huffnode& root, unsigned which, const PoolType<huffnode>& pool)
- {
- const auto* node = pool.get(root.branch[which]);
- while(node->branch[1]) node = pool.get(node->branch[GetBits(1)]);
- return node->value;
- };
- // Read compressed blocks
- for(bool last=false; !last; )
- {
- switch(last=GetBits(1), header=GetBits(2))
- {
- case 0: // copy stored block data
- GetBits(Bits.Count%8); // Go to byte boundary (discard a few bits)
- {unsigned a = GetBits(16), b = GetBits(16);
- assert(a == (b^0xFFFF));
- // Note: It is valid for "a" to be 0 here.
- // It is sometimes used for aligning the stream to byte boundary.
- while(a--) Put(GetBits(8)); }
- continue;
- case 1: // fixed block
- if(!tables_fixed.branch[0])
- {
- // Create fixed tables. Technically, this table could be generated
- // at compile-time, but I don't see an easy way to do it.
- for(auto n=0u; n<sizeof fix; )
- for(unsigned what=fix[n++], p=8*fix[n++], end=p+8*fix[n++]; p<end; ++p)
- Lengths[p] = what;
- // Note: fix[] intentionally contains overlaps. It is designed
- // such that clang generates rather optimal SSE assembly code.
- tables_fixed.Create(0, 288, Lengths, pool1); // 575 used here
- tables_fixed.Create(1, 32, Lengths+288, pool1); // 63 used here
- assert(pool1.used == pool1_reserved);
- // ^pool1 has always 638 elements. If this assertion fails,
- // something is wrong. Maybe coincidentally same as (288+32-1)*2.
- }
- cur_table = &tables_fixed; cur_pool = &pool1;
- break;
- default: // dynamic block
- assert(header==2);
- unsigned nlen = GetBits(5) + 257; // 257..288
- unsigned ndist = GetBits(5) + 1; // 1..32
- unsigned ncode = GetBits(4) + 4; // 4..19
- assert(nlen+ndist <= 288+32);
- for(unsigned a=0; a<19; ++a) Lengths[order[a]] = a<ncode ? GetBits(3) : 0;
- pool2.clear();
- tables_dyn.Create(0, 19, Lengths+0, pool2); // length-lengths
- assert(pool2.used <= pool2_reserved);
- for(unsigned end,lencode,prev_lencode=0, code=0; code < nlen + ndist; )
- {
- switch(lencode = Read(tables_dyn, 0, pool2))
- {
- case 16: end = code+3+GetBits(2); lencode = prev_lencode; break;
- case 17: end = code+3+GetBits(3); lencode = 0; break;
- case 18: end = code+11+GetBits(7); lencode = 0; break;
- default: end = code+1; prev_lencode = lencode; assert(lencode < 16);
- }
- assert(end <= nlen+ndist);
- while(code < end) Lengths[code++] = lencode;
- }
- pool2.clear();
- tables_dyn.Create(0, nlen, Lengths+0, pool2);
- tables_dyn.Create(1, ndist, Lengths+nlen, pool2);
- assert(pool2.used <= pool2_reserved);
- // ^If this assertion fails, simply increase pool2_reserved.
- // Try e.g. 2048-pool1_reserved.
- // So far the largest value seen in the wild is 630.
- //fprintf(stderr, "pool2 size%5u\n", pool2.used);
- cur_table = &tables_dyn; cur_pool = &pool2;
- }
- // Do actual deflating.
- for(;;)
- {
- auto code = Read(*cur_table, 0, *cur_pool);
- if(!(code & -256)) Put(code); // 0..255: literal byte
- else if(!(code & 0xFF)) break; // 256=end
- else // 257..285: backwards reference
- {
- unsigned length = (lbase-257)[code] + GetBits((lbits-257)[code]) + 3;
- code = Read(*cur_table, 1, *cur_pool); // Read distance code
- unsigned distance = dbase[code] + GetBits(dbits[code]);
- unsigned offs = Window.Head - distance;
- do Put(Window.Data[offs++ & Window.SizeMask]); while(--length);
- }
- }
- }
- // Note: after this, may come a checksum, and a trailer. Ignoring them.
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement