Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import msvcrt as m
- from enum import Enum
- import re, codecs, datetime, sys, math
- LANG_COLUMN_COUNT = 14
- # write the name of the source text file.
- # Possible options: textquest_queststring, texthelp, textquest_otherstring, textuisystem, textquest_speech&name, textdata_object
- SOURCE_SRO_TEXT_FILE = "textuisystem"
- _PROGRESS = 0
- _FILELENGTH = 0
- _POPT = 0
- _POEDIT_BASE_PATH = "C:\\"
- _POEDIT_SEARCHPATH = "C:\\"
- # progressbar
- class U:
- def prg(prog):
- fillchar = '='
- emptchar = '-'
- fillt = 0
- emptt = 20
- prog = int(prog)
- if prog < 100 and prog > 0:
- prog2 = prog/5
- fillt = fillt + prog2
- emptt = emptt - prog2
- sys.stdout.write("\r[" + str(fillchar)*math.ceil(fillt) + str(emptchar)*math.ceil(emptt) + "] " + str(prog) + "%")
- sys.stdout.flush()
- elif prog >= 100:
- prog = 100
- prog2 = prog/5
- fillt = fillt + prog2
- emptt = emptt - prog2
- sys.stdout.write("\r[" + str(fillchar)*math.ceil(fillt) + str(emptchar)*math.ceil(emptt) + "] " + str(prog) + "%" + "\nDone!")
- sys.stdout.flush()
- elif prog < 0:
- prog = 1
- prog2 = prog/5
- fillt = fillt + prog2
- emptt = emptt - prog2
- sys.stdout.write("\r[" + str(fillchar)*fillt + str(emptchar)*emptt + "] " + str(prog) + "%" + "\nHalted!")
- sys.stdout.flush()
- def pq(lnid):
- global _PROGRESS
- global _POPT
- if _PROGRESS == 0:
- _PROGRESS = lnid
- U.prg(_PROGRESS / (_FILELENGTH * 0.01) + 1)
- if (lnid - _POPT) > _PROGRESS:
- _PROGRESS = lnid
- U.prg(_PROGRESS / (_FILELENGTH * 0.01) + 1)
- def pqq(lnid, length):
- global _PROGRESS
- global _POPT
- if _PROGRESS == 0:
- _PROGRESS = lnid
- U.prg(_PROGRESS / (length * 0.01) + 1)
- if (lnid - _POPT) > _PROGRESS:
- _PROGRESS = lnid
- U.prg(_PROGRESS / (length * 0.01) + 1)
- class Language(Enum):
- Korean = 0
- Lang_1 = 1
- Lang_2 = 2
- Lang_Chinese = 3
- Lang_Taiwan = 4
- Lang_Japan = 5
- Lang_English = 6
- Lang_Hungarian = 7 # basically Vietnam
- Lang_Thailand = 8
- Lang_Russian = 9
- Lang_Turkey = 10
- Lang_Spanish = 11
- Lang_Arabian = 12
- Lang_German = 13
- Lang_Error = 14 # if there's an error then the data associated to this lang.
- class Catalog(object):
- def __init__(self, _lang, _id, _text, lnid):
- self.lang = _lang
- self.Id = _id
- self.Text = _text
- self.Line = lnid
- self.bMultiLined = False
- class CatalogCollection(object):
- __hash__ = None
- def __init__(self):
- self.data = {
- Language.Korean : list(),
- Language.Lang_1 : list(),
- Language.Lang_2 : list(),
- Language.Lang_Arabian : list(),
- Language.Lang_Chinese : list(),
- Language.Lang_English : list(),
- Language.Lang_German : list(),
- Language.Lang_Hungarian : list(),
- Language.Lang_Japan : list(),
- Language.Lang_Russian : list(),
- Language.Lang_Spanish : list(),
- Language.Lang_Taiwan : list(),
- Language.Lang_Thailand : list(),
- Language.Lang_Turkey : list(),
- Language.Lang_Error : list()
- }
- self._bSplittedLine = False
- self._bMultiLineCat = False
- def addEntry(self, cat):
- self.data[cat.lang].append(cat)
- def editEntry(self, cat):
- temp = self.data[cat.lang]
- idx = temp.index(cat)
- if idx > -1:
- self.data[cat.lang][idx] = cat
- else:
- print("CatalogCollection::editEntry: given element not found.")
- def __repr__(self): return repr(self.data)
- def __getitem__(self, key):
- if key in self.data:
- return self.data[key]
- if hasattr(self.__class__, "__missing__"):
- return self.__class__.__missing__(self, key)
- raise KeyError(key)
- def __cmp__(self, dict):
- if isinstance(dict, UserDict):
- return cmp(self.data, dict.data)
- else:
- return cmp(self.data, dict)
- def __setitem__(self, key, item): self.data[key] = item
- def __delitem__(self, key): del self.data[key]
- def __iter__(self): return iter(self.data)
- def clear(self): self.data.clear()
- def keys(self): return self.data.keys()
- def items(self): return self.data.items()
- def iteritems(self): return self.data.iteritems()
- def iterkeys(self): return self.data.iterkeys()
- def itervalues(self): return self.data.itervalues()
- def values(self): return self.data.values()
- def has_key(self, key): return key in self.data
- def update(self, dict=None, **kwargs):
- if dict is None:
- pass
- elif isinstance(dict, UserDict):
- self.data.update(dict.data)
- elif isinstance(dict, type({})) or not hasattr(dict, 'items'):
- self.data.update(dict)
- else:
- for k, v in dict.items():
- self[k] = v
- if len(kwargs):
- self.data.update(kwargs)
- def __len__(self):
- return len(self.data)
- def __splitPoLines(self, text, handle, mode="id"):
- text = re.sub(r'[\\]', r'\\\\', text)
- text = re.sub("[\"]", "\\\"", text)
- # newer versions of silkroad use UNIX Line endings in mid-column sections
- # also we strip the line ending \r\n from the text argument before we pass it.
- lines = re.split(r"\r\n|\n", text)
- if len(lines) > 1:
- self._bSplittedLine = True
- handle.write("msg%s \"\"\n" % mode)
- lc = len(lines)
- idx = 1
- for line in lines:
- handle.write("\"%s\\n\"" % (line))
- if idx != lc:
- handle.write("\n")
- idx += 1
- handle.write("\n")
- else:
- self._bSplittedLine = False
- handle.write("msg%s \"%s\"\n" % (mode, text + ("\\n" if self._bMultiLineCat else "")))
- def exportPoFile(self):
- global _PROGRESS
- eref = self.data[Language.Lang_English]
- for __lang, val in self.data.items():
- print("Current file: " + "%s_%s.po" % (SOURCE_SRO_TEXT_FILE, __lang.name))
- U.prg(0)
- with codecs.open("%s_%s.po" % (SOURCE_SRO_TEXT_FILE, __lang.name), "w", encoding='utf-8') as fh:
- fh.write("msgid \"\"\nmsgstr \"\"\n")
- fh.write("\"Project-Id-Version: Project1\\n\"\n")
- fh.write("\"POT-Creation-Date: %s\\n\"\n" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M-0000')))
- fh.write("\"PO-Revision-Date: %s\\n\"\n" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M-0000')))
- fh.write("\"Last-Translator: Translator1 <xxxx@xxxx.com>\\n\"\n")
- fh.write("\"Language-Team: xxxx\\n\"\n")
- fh.write("\"Language: xxx\\n\"\n")
- fh.write("\"MIME-Version: 1.0\\n\"\n")
- fh.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n")
- fh.write("\"Content-Transfer-Encoding: 8bit\\n\"\n")
- fh.write("\"X-Generator: SROPyTools - generateCatalog.py v1.0\\n\"\n")
- fh.write("\"X-Poedit-KeywordsList: _;gettext;gettext_noop\\n\"\n")
- fh.write("\"X-Poedit-Basepath: %s\\n\"\n", % _POEDIT_BASEPATH)
- fh.write("\"X-Poedit-SourceCharset: UTF-8\\n\"\n")
- fh.write("\"X-Poedit-SearchPath-0: %s\\n\"\n", % _POEDIT_SEARCHPATH)
- fh.write("\n\n")
- engText = "" # we store the formated english text in this var
- # we go through the entries of the current language
- # and we write them inside the language's .po file
- _i = 0
- _PROGRESS = 0
- for i in val:
- fh.write("#: ./%s.txt:%s\n" % (SOURCE_SRO_TEXT_FILE, i.Line))
- t = [x for x in eref if x.Id == i.Id]
- if len(t) > 0:
- #engText = re.sub(r"\r\n", "\\r\\n", t[0].Text)
- engText = t[0].Text
- self._bMultiLineCat = t[0].bMultiLined
- else:
- engText = "--- Generator Exception: English source text not found. ---"
- print("\nEnglish source not found. Id: " + i.Id)
- self.__splitPoLines("%s --- source:[ %s ]" % (i.Id, engText), fh, "id")
- if i.Text != "0" and i.Text != "":
- self.__splitPoLines(i.Text, fh, "str")
- else:
- fh.write("msgstr \"%s\"\n" % (i.Text + ("\\n" if self._bMultiLineCat or self._bSplittedLine else "")))
- fh.write("\n")
- _i += 1
- U.pqq(_i, len(val))
- U.prg(100)
- print("\n")
- def exportRawFile(self):
- with codecs.open(SOURCE_SRO_TEXT_FILE + "_parsed.txt", "w", encoding='utf-16le') as fh:
- for lang, val in self.data.items():
- fh.write("%s -------- \r\n" % lang.name)
- for x in val:
- fh.write("%s - %s \r\n" % (x.Id, x.Text))
- fh.write("\r\n\r\n")
- def generateCatalogTemplate(self):
- with codecs.open(SOURCE_SRO_TEXT_FILE + "_catalog.php", "w", encoding='utf-16le') as fh:
- fh.write("<?php \r\n")
- for lang, val in self.data.items():
- for x in val:
- fh.write("gettext(%s);\r\n" % x.Id)
- break
- fh.write("\r\n\r\n?>")
- def wait():
- print("Press any key to continue...")
- m.getch()
- def __criticalLangSection(nColId, szColId, szCol, lineidx):
- nColId = nColId - 2
- try:
- return Catalog(Language(nColId), szColId, szCol, lineidx)
- except:
- return Catalog(Language(14), szColId, szCol, lineidx)
- expr = r"(([A-Z0-9]+_)+([A-Z0-9]+)?)"
- prog = re.compile(expr)
- precatalog = list()
- poentries = CatalogCollection()
- lineidx = -1
- currentLang = Language(0)
- currentId = ""
- with codecs.open("%s.txt" % (SOURCE_SRO_TEXT_FILE), encoding='utf-16le') as f:
- print("Parsing %s.txt ..." % SOURCE_SRO_TEXT_FILE)
- rlnid = 1
- # get file length
- for i, l in enumerate(f):
- pass
- _FILELENGTH = i + 1
- if _FILELENGTH >= 10000:
- _POPT = 500
- elif _FILELENGTH >= 1000:
- _POPT = 200
- elif _FILELENGTH <= 1000 and _FILELENGTH >= 500:
- _POPT = 20
- elif _FILELENGTH <= 500:
- _POPT = 5
- f.seek(0)
- U.prg(0)
- for line in f:
- if line == "\r\n" or line[0:2] == "//":
- continue
- if line[0] == '\t':
- line = line[1:]
- currentLang = Language(currentLang.value + 1)
- ma = re.search(expr, line)
- columns = re.split(r"\t", line)
- collen = len(columns)
- cId = -1
- if ma:
- cId = columns[1]
- currentId = cId
- lineidx += 1
- tidx = 0
- for col in columns:
- if col == "" or col == "\r\n":
- col = "0"
- # sometimes there's an empty useless coloumn at the end of the full lines
- if (collen - 2) > LANG_COLUMN_COUNT:
- continue
- if cId != -1:
- if tidx >= 2:
- # if there are malformed lines in the text file then we
- # handle them in a try catch section and we store that data
- # in a sepearate section
- ec = __criticalLangSection(tidx, cId, col, lineidx + 1)
- currentLang = ec.lang
- poentries.addEntry(ec)
- else: # if the lines are breaked down between the columns
- t = poentries[currentLang]
- ithasidx = False
- for ikx, val in enumerate(t):
- if ikx == lineidx:
- ithasidx = True
- break
- elif val.Id == currentId and val.lang == currentLang:
- ithasidx = True
- lineidx = ikx
- break
- if ithasidx:
- poentries[currentLang][lineidx].Text += col
- poentries[currentLang][lineidx].bMultiLined = True
- else:
- temp = poentries[currentLang]
- poentries[currentLang].insert(lineidx, Catalog(currentLang, currentId, col, lineidx + 1))
- temp = None
- if collen != 1 and tidx < (collen - 1):
- try:
- currentLang = Language(currentLang.value + 1)
- except:
- currentLang = currentLang
- tidx += 1
- rlnid += 1
- U.pq(rlnid)
- U.prg(100)
- _POPT = 25
- print("\n\n")
- print("parsing done. exporting raw...")
- poentries.exportRawFile()
- print("generating catalog...")
- poentries.generateCatalogTemplate()
- print("generating .po files...")
- poentries.exportPoFile()
- print("Malformed section count in text file: %s" % (len(poentries[Language.Lang_Error])))
- print("done")
- wait()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement