Fixed LZ77 implementation

from abc import ABCMeta, abstractmethod
import time
import numpy as np
from cv2_41 import cv2

# ============================================================================

def save_image_size(filename, rows, cols, channels):
    with open(filename, "w") as imgSize:
        imgSize.write(str(rows) + '\n')  # write row dimension
        imgSize.write(str(cols) + '\n')  # write col dimension
        imgSize.write(str(channels) + '\n')  # write col dimension

# ----------------------------------------------------------------------------

def load_image_size(filename):
    with open(filename, "r") as imgSize:
        row = int(imgSize.readline())
        col = int(imgSize.readline())
        ch = int(imgSize.readline())

    return (row, col, ch)

# ============================================================================

class LZ77Output(object):
    __metaclass__ = ABCMeta

    @abstractmethod
    def write_symbol(self, match_info, char):
        pass

# ----------------------------------------------------------------------------

class LZ77OutputNPArray(LZ77Output):
    def __init__(self):
        self._tuples = np.array([], dtype=np.uint16)
        self._chars = np.array([], dtype=np.uint8)

    def write_symbol(self, match_info, char):
        self._tuples = np.append(self._tuples, np.uint16(match_info))
        self._chars = np.append(self._chars, char)

    def save(self, filename_tuples, filename_chars):
        np.save(filename_tuples, self._tuples)
        np.save(filename_chars, self._chars)

# ----------------------------------------------------------------------------

class LZ77OutputList(LZ77Output):
    def __init__(self):
        self._tuples = []
        self._chars = []

    def write_symbol(self, match_info, char):
        self._tuples.append(match_info[0])
        self._tuples.append(match_info[1])
        self._chars.append(char)

    def save(self, filename_tuples, filename_chars):
        np.save(filename_tuples, np.array(self._tuples, dtype=np.uint16))
        np.save(filename_chars, np.array(self._chars, dtype=np.uint8))

# ============================================================================

class LZ77Input(object):
    __metaclass__ = ABCMeta

    @abstractmethod
    def read_symbol(self):
        pass

# ----------------------------------------------------------------------------

class LZ77InputNPArray(LZ77Input):
    def __init__(self, filename_tuples, filename_chars):
        self._tuples = np.load(filename_tuples)
        self._chars = np.load(filename_chars)
        self._pos = 0

    def read_symbol(self):
        if self._pos >= len(self._chars):
            return (None, None)
        match_info = (self._tuples[self._pos * 2], self._tuples[self._pos * 2 + 1])
        char = self._chars[self._pos]
        self._pos += 1
        return (match_info, char)

# ============================================================================

FILENAME_IMAGE_SIZE = "imgSize.txt"
FILENAME_COMPRESSED = "Compressed.txt"
FILENAME_ENCODED_TUPLE = "encodedTuple.npy"
FILENAME_ENCODED_CHARS = "encodedChar.npy"

# ============================================================================

def get_match_length(input_buffer, search_pos, pos, look_ahead_length):
    for i in range(look_ahead_length):
        if input_buffer[search_pos + i] != input_buffer[pos + i]:
            return i
    return look_ahead_length

# ----------------------------------------------------------------------------

def find_longest_match(input_buffer, sliding_window_start, pos, look_ahead_end):
    look_ahead_length = look_ahead_end - pos
    max_match_length = 0
    max_match_pos = 0

    for search_pos in range(pos - 1, sliding_window_start - 1, -1):
        match_length = get_match_length(input_buffer, search_pos, pos, look_ahead_length)
        if match_length > max_match_length:
            max_match_length = match_length
            max_match_pos = search_pos
            if match_length >= look_ahead_length:
                break # Short circuit, can't get any longer (when sliding window is bigger than look-ahead)

    if max_match_length == 0:
        return 0, 0
    return (pos - max_match_pos, max_match_length)

# ----------------------------------------------------------------------------

def lz77_compress_buffer(input_buffer, sliding_window_size, look_ahead_size, lz77_out):
    pos = 0 # Current position -- beginning of look-ahead, one past the end of sliding window
    end_pos = input_buffer.size
    while pos < end_pos:
        sliding_window_start = max(0, pos - sliding_window_size)
        look_ahead_end = min(end_pos, pos + look_ahead_size)
        offset, length = find_longest_match(input_buffer, sliding_window_start, pos, look_ahead_end)
        if length < 2:
            # No match, just emit literal
            symbol = input_buffer[pos]
            #print('%04d: Literal %s' % (pos, symbol))

            lz77_out.write_symbol((0, 0), symbol)
            pos += 1
        else:
            if pos + length == end_pos:
                length -= 1 # Make sure there is one literal left to code
            match_info = (offset, length)
            symbol = input_buffer[pos + length]
            #print('%04d: Match %s + Literal %s' % (pos, match_info, symbol))
            lz77_out.write_symbol(match_info, symbol)
            pos += length + 1

    return lz77_out

# ----------------------------------------------------------------------------

def lz77Compress(image, sliding_window_size, look_ahead_size, lz77_output_class=LZ77OutputList):
    img = cv2.imread(image)
    input_buffer = img.flatten()

    lz77_out = lz77_output_class()

    t_start = time.time()
    lz77_compress_buffer(input_buffer, sliding_window_size, look_ahead_size, lz77_out)
    t_end = time.time()

    save_image_size(FILENAME_IMAGE_SIZE, *img.shape)
    lz77_out.save(FILENAME_ENCODED_TUPLE, FILENAME_ENCODED_CHARS)
    # lz77_out.save_debug(FILENAME_COMPRESSED)

    print("Compress: %s" % (t_end - t_start))

# ============================================================================

def copy_match(output_buffer, offset, length, pos):
    for i in range(length):
        output_buffer[pos + i] = output_buffer[pos - offset + i]

# ----------------------------------------------------------------------------

def lz77_decompress_buffer(lz77_in, expected_size):
    output_buffer = np.empty((expected_size), np.uint8)

    pos = 0
    end_pos = output_buffer.size
    while pos < end_pos:
        match_info, symbol = lz77_in.read_symbol()
        if not match_info:
            break

        offset, length = match_info
        if length == 0:
            output_buffer[pos] = symbol
            pos += 1
        else:
            copy_match(output_buffer, offset, length, pos)
            output_buffer[pos + length] = symbol
            pos += length + 1

    assert pos == end_pos

    return output_buffer

# ----------------------------------------------------------------------------

def lz77Decompressor(lz77_input_class=LZ77Input):
    rows, cols, channels = load_image_size(FILENAME_IMAGE_SIZE)

    lz77_in = LZ77InputNPArray(FILENAME_ENCODED_TUPLE, FILENAME_ENCODED_CHARS)

    t_start = time.time()
    output_buffer = lz77_decompress_buffer(lz77_in, rows * cols * channels)
    t_end = time.time()

    image = np.reshape(output_buffer, (rows, cols, channels))

    cv2.imwrite("output.png", image)

    print("Decompress: %s" % (t_end - t_start))

# ============================================================================