srolangtool

import msvcrt as m
from enum import Enum
import re, codecs, datetime, sys, math

LANG_COLUMN_COUNT = 14
# write the name of the source text file.
# Possible options: textquest_queststring, texthelp, textquest_otherstring, textuisystem, textquest_speech&name, textdata_object
SOURCE_SRO_TEXT_FILE = "textuisystem"
_PROGRESS = 0
_FILELENGTH = 0
_POPT = 0
_POEDIT_BASE_PATH = "C:\\"
_POEDIT_SEARCHPATH = "C:\\"

# progressbar
class U:
    def prg(prog):
        fillchar = '='
        emptchar = '-'
        fillt = 0
        emptt = 20
        prog = int(prog)
        if prog < 100 and prog > 0:
            prog2 = prog/5
            fillt = fillt + prog2
            emptt = emptt - prog2
            sys.stdout.write("\r[" + str(fillchar)*math.ceil(fillt) + str(emptchar)*math.ceil(emptt) + "]  " + str(prog) + "%")
            sys.stdout.flush()
        elif prog >= 100:
            prog = 100
            prog2 = prog/5
            fillt = fillt + prog2
            emptt = emptt - prog2
            sys.stdout.write("\r[" + str(fillchar)*math.ceil(fillt) + str(emptchar)*math.ceil(emptt) + "]  " + str(prog) + "%" + "\nDone!")
            sys.stdout.flush()
        elif prog < 0:
            prog = 1
            prog2 = prog/5
            fillt = fillt + prog2
            emptt = emptt - prog2
            sys.stdout.write("\r[" + str(fillchar)*fillt + str(emptchar)*emptt + "]  " + str(prog) + "%" + "\nHalted!")
            sys.stdout.flush()


    def pq(lnid):
        global _PROGRESS
        global _POPT

        if _PROGRESS == 0:
            _PROGRESS = lnid
            U.prg(_PROGRESS / (_FILELENGTH * 0.01) + 1)
        if (lnid - _POPT) > _PROGRESS:
            _PROGRESS = lnid
            U.prg(_PROGRESS / (_FILELENGTH * 0.01) + 1)

    def pqq(lnid, length):
        global _PROGRESS
        global _POPT

        if _PROGRESS == 0:
            _PROGRESS = lnid
            U.prg(_PROGRESS / (length * 0.01) + 1)
        if (lnid - _POPT) > _PROGRESS:
            _PROGRESS = lnid
            U.prg(_PROGRESS / (length * 0.01) + 1)


class Language(Enum):
    Korean = 0
    Lang_1 = 1
    Lang_2 = 2
    Lang_Chinese = 3
    Lang_Taiwan = 4
    Lang_Japan = 5
    Lang_English = 6
    Lang_Hungarian = 7 # basically Vietnam
    Lang_Thailand = 8
    Lang_Russian = 9
    Lang_Turkey = 10
    Lang_Spanish = 11
    Lang_Arabian = 12
    Lang_German = 13
    Lang_Error = 14 # if there's an error then the data associated to this lang.

class Catalog(object):
    def __init__(self, _lang, _id, _text, lnid):
        self.lang = _lang
        self.Id = _id
        self.Text = _text
        self.Line = lnid
        self.bMultiLined = False

class CatalogCollection(object):
    __hash__ = None

    def __init__(self):
        self.data = {
                Language.Korean : list(),
                Language.Lang_1 : list(),
                Language.Lang_2 : list(),
                Language.Lang_Arabian : list(),
                Language.Lang_Chinese : list(),
                Language.Lang_English : list(),
                Language.Lang_German : list(),
                Language.Lang_Hungarian : list(),
                Language.Lang_Japan : list(),
                Language.Lang_Russian : list(),
                Language.Lang_Spanish : list(),
                Language.Lang_Taiwan : list(),
                Language.Lang_Thailand : list(),
                Language.Lang_Turkey : list(),
                Language.Lang_Error : list()
        }
        self._bSplittedLine = False
        self._bMultiLineCat = False

    def addEntry(self, cat):
        self.data[cat.lang].append(cat)

    def editEntry(self, cat):
        temp = self.data[cat.lang]
        idx = temp.index(cat)
        if idx > -1:
            self.data[cat.lang][idx] = cat
        else:
            print("CatalogCollection::editEntry: given element not found.")

    def __repr__(self): return repr(self.data)

    def __getitem__(self, key):
        if key in self.data:
            return self.data[key]
        if hasattr(self.__class__, "__missing__"):
            return self.__class__.__missing__(self, key)
        raise KeyError(key)

    def __cmp__(self, dict):
        if isinstance(dict, UserDict):
            return cmp(self.data, dict.data)
        else:
            return cmp(self.data, dict)

    def __setitem__(self, key, item): self.data[key] = item
    def __delitem__(self, key): del self.data[key]
    def __iter__(self): return iter(self.data)

    def clear(self): self.data.clear()
    def keys(self): return self.data.keys()
    def items(self): return self.data.items()
    def iteritems(self): return self.data.iteritems()
    def iterkeys(self): return self.data.iterkeys()
    def itervalues(self): return self.data.itervalues()
    def values(self): return self.data.values()
    def has_key(self, key): return key in self.data

    def update(self, dict=None, **kwargs):
        if dict is None:
            pass
        elif isinstance(dict, UserDict):
            self.data.update(dict.data)
        elif isinstance(dict, type({})) or not hasattr(dict, 'items'):
            self.data.update(dict)
        else:
            for k, v in dict.items():
                self[k] = v
        if len(kwargs):
            self.data.update(kwargs)

    def __len__(self):
        return len(self.data)

    def __splitPoLines(self, text, handle, mode="id"):
        text = re.sub(r'[\\]', r'\\\\', text)
        text = re.sub("[\"]", "\\\"", text)
        # newer versions of silkroad use UNIX Line endings in mid-column sections
        # also we strip the line ending \r\n from the text argument before we pass it.
        lines = re.split(r"\r\n|\n", text)
        if len(lines) > 1:
            self._bSplittedLine = True
            handle.write("msg%s \"\"\n" % mode)
            lc = len(lines)
            idx = 1
            for line in lines:
                handle.write("\"%s\\n\"" % (line))
                if idx != lc:
                    handle.write("\n")
                idx += 1
            handle.write("\n")
        else:
            self._bSplittedLine = False
            handle.write("msg%s \"%s\"\n" % (mode, text + ("\\n" if self._bMultiLineCat else "")))

    def exportPoFile(self):
        global _PROGRESS
        eref = self.data[Language.Lang_English]
        for __lang, val in self.data.items():
            print("Current file:  " + "%s_%s.po" % (SOURCE_SRO_TEXT_FILE, __lang.name))
            U.prg(0)
            with codecs.open("%s_%s.po" % (SOURCE_SRO_TEXT_FILE, __lang.name), "w", encoding='utf-8') as fh:
                fh.write("msgid \"\"\nmsgstr \"\"\n")
                fh.write("\"Project-Id-Version: Project1\\n\"\n")
                fh.write("\"POT-Creation-Date: %s\\n\"\n" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M-0000')))
                fh.write("\"PO-Revision-Date: %s\\n\"\n" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M-0000')))
                fh.write("\"Last-Translator: Translator1 <xxxx@xxxx.com>\\n\"\n")
                fh.write("\"Language-Team: xxxx\\n\"\n")
                fh.write("\"Language: xxx\\n\"\n")
                fh.write("\"MIME-Version: 1.0\\n\"\n")
                fh.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n")
                fh.write("\"Content-Transfer-Encoding: 8bit\\n\"\n")
                fh.write("\"X-Generator: SROPyTools - generateCatalog.py v1.0\\n\"\n")
                fh.write("\"X-Poedit-KeywordsList: _;gettext;gettext_noop\\n\"\n")
                fh.write("\"X-Poedit-Basepath: %s\\n\"\n", % _POEDIT_BASEPATH)
                fh.write("\"X-Poedit-SourceCharset: UTF-8\\n\"\n")
                fh.write("\"X-Poedit-SearchPath-0: %s\\n\"\n", % _POEDIT_SEARCHPATH)
                fh.write("\n\n")

                engText = "" # we store the formated english text in this var

                # we go through the entries of the current language
                # and we write them inside the language's .po file
                _i = 0
                _PROGRESS = 0
                for i in val:
                    fh.write("#: ./%s.txt:%s\n" % (SOURCE_SRO_TEXT_FILE, i.Line))

                    t = [x for x in eref if x.Id == i.Id]
                    if len(t) > 0:
                        #engText = re.sub(r"\r\n", "\\r\\n", t[0].Text)
                        engText = t[0].Text
                        self._bMultiLineCat = t[0].bMultiLined
                    else:
                        engText = "--- Generator Exception: English source text not found. ---"
                        print("\nEnglish source not found. Id: " + i.Id)

                    self.__splitPoLines("%s --- source:[ %s ]" % (i.Id, engText), fh, "id")

                    if i.Text != "0" and i.Text != "":
                        self.__splitPoLines(i.Text, fh, "str")
                    else:
                        fh.write("msgstr \"%s\"\n" % (i.Text + ("\\n" if self._bMultiLineCat or self._bSplittedLine else "")))
                    fh.write("\n")
                    _i += 1
                    U.pqq(_i, len(val))
                U.prg(100)
                print("\n")

    def exportRawFile(self):
        with codecs.open(SOURCE_SRO_TEXT_FILE + "_parsed.txt", "w", encoding='utf-16le') as fh:
            for lang, val in self.data.items():
                fh.write("%s -------- \r\n" % lang.name)
                for x in val:
                    fh.write("%s - %s \r\n" % (x.Id, x.Text))
                fh.write("\r\n\r\n")

    def generateCatalogTemplate(self):
        with codecs.open(SOURCE_SRO_TEXT_FILE + "_catalog.php", "w", encoding='utf-16le') as fh:
            fh.write("<?php \r\n")
            for lang, val in self.data.items():
                for x in val:
                    fh.write("gettext(%s);\r\n" % x.Id)
                break
            fh.write("\r\n\r\n?>")
def wait():
    print("Press any key to continue...")
    m.getch()

def __criticalLangSection(nColId, szColId, szCol, lineidx):
    nColId = nColId - 2
    try:
        return Catalog(Language(nColId), szColId, szCol, lineidx)
    except:
        return Catalog(Language(14), szColId, szCol, lineidx)

expr = r"(([A-Z0-9]+_)+([A-Z0-9]+)?)"

prog = re.compile(expr)
precatalog = list()
poentries = CatalogCollection()
lineidx = -1
currentLang = Language(0)
currentId = ""

with codecs.open("%s.txt" % (SOURCE_SRO_TEXT_FILE), encoding='utf-16le') as f:

    print("Parsing %s.txt ..." % SOURCE_SRO_TEXT_FILE)
    rlnid = 1
    # get file length
    for i, l in enumerate(f):
        pass
    _FILELENGTH = i + 1

    if _FILELENGTH >= 10000:
        _POPT = 500
    elif _FILELENGTH >= 1000:
        _POPT = 200
    elif _FILELENGTH <= 1000 and _FILELENGTH >= 500:
        _POPT = 20
    elif _FILELENGTH <= 500:
        _POPT = 5

    f.seek(0)

    U.prg(0)

    for line in f:
        if line == "\r\n" or line[0:2] == "//":
            continue

        if line[0] == '\t':
            line = line[1:]
            currentLang = Language(currentLang.value + 1)

        ma = re.search(expr, line)
        columns = re.split(r"\t", line)
        collen = len(columns)
        cId = -1

        if ma:
            cId = columns[1]
            currentId = cId
            lineidx += 1

        tidx = 0
        for col in columns:
            if col == "" or col == "\r\n":
                col = "0"
                # sometimes there's an empty useless coloumn at the end of the full lines
                if (collen - 2) > LANG_COLUMN_COUNT:
                    continue
            if cId != -1:
                if tidx >= 2:
                    # if there are malformed lines in the text file then we
                    # handle them in a try catch section and we store that data
                    # in a sepearate section
                    ec = __criticalLangSection(tidx, cId, col, lineidx + 1)
                    currentLang = ec.lang
                    poentries.addEntry(ec)

            else: # if the lines are breaked down between the columns
                t = poentries[currentLang]
                ithasidx = False

                for ikx, val in enumerate(t):
                    if ikx == lineidx:
                        ithasidx = True
                        break
                    elif val.Id == currentId and val.lang == currentLang:
                        ithasidx = True
                        lineidx = ikx
                        break

                if ithasidx:
                    poentries[currentLang][lineidx].Text += col
                    poentries[currentLang][lineidx].bMultiLined = True
                else:
                    temp = poentries[currentLang]
                    poentries[currentLang].insert(lineidx, Catalog(currentLang, currentId, col, lineidx + 1))
                    temp = None

                if collen != 1 and tidx < (collen - 1):
                    try:
                        currentLang = Language(currentLang.value + 1)
                    except:
                        currentLang = currentLang

            tidx += 1
        rlnid += 1
        U.pq(rlnid)
    U.prg(100)
    _POPT = 25
    print("\n\n")
    print("parsing done. exporting raw...")
    poentries.exportRawFile()
    print("generating catalog...")
    poentries.generateCatalogTemplate()
    print("generating .po files...")
    poentries.exportPoFile()

print("Malformed section count in text file: %s" % (len(poentries[Language.Lang_Error])))
print("done")
wait()