wesnoth/data/tools/pywmlx/state/machine.py

import re
# import os

from pywmlx.wmlerr import wmlerr
from pywmlx.wmlerr import wmlwarn
from pywmlx.wmlerr import warnall
from pywmlx.postring import PoCommentedString
from pywmlx.postring import PoCommentedStringPL
from pywmlx.state.state import State
from pywmlx.state.lua_states import setup_luastates
from pywmlx.state.wml_states import setup_wmlstates

import pywmlx.nodemanip
import pdb

# Universe - convenient singleton for which
# `x in Universe` is always True
# Passing it to a filter is equivalent to not filtering.
class UniversalSet:
    def __contains__(self, any):
        return True

Universe = UniversalSet()

# --------------------------------------------------------------------
#  PART 1: machine.py global variables
# --------------------------------------------------------------------


# True if --warnall option is used
_warnall = False
# True if -D option is used
_debugmode = False
# debug output file
_fdebug = None
# dictionary of pot sentences
_dictionary = None
# dictionary containing lua and WML states
_states = None
# initialdomain value (set with --initialdomain command line option)
_initialdomain = None
# the current domain value when parsing file (changed by #textdomain text)
_currentdomain = None
# the domain value (set with --domain command line option)
_domains = Universe
# this boolean value will be usually:
#   True (when the file is a WML .cfg file)
#   False (when the file is a .lua file)
_waitwml = True
# this boolean value is very useful to avoid a possible bug
# verified in a special case
# (see WmlGoluaState on wml_states.py for more details)
_on_luatag = False

# ---------

# pending additional infos for translators collected from # po
# or # po-override comments.
_pending_cinfo = {
    # pending additional infos for translators (# po: addedinfo)
    "po": None,
    # pending override wmlinfo for translators (# po-override: overrideinfo)
    "po-override": None,
}

# type of pending wmlinfo:
# it can be None or it can have an actual value.
# Possible actual values are: 'speaker', 'id', 'role', 'description',
#                             'condition', 'type', 'race' or 'gender'
_pending_winfotype = None

# ----------

# the last function name encountered in a lua code (if any).
# If no lua functions already encountered, this var will be None
_pending_luafuncname = None

# ----------

# pending lua/wml string (they will be evaluated, and if translatable it will
# be added in _dictionary
_pending_luastring = None
_pending_wmlstring = None

# ----------

# counting line number
_current_lineno = 0
# lineno_sub helps to set the right orderid of the future PoCommentedString
_linenosub = 0


# --------------------------------------------------------------------
#  PART 2: machine.py functions and classes
# --------------------------------------------------------------------


def clear_pending_infos(lineno, error=False):
    global _pending_cinfo
    for key in _pending_cinfo:
        if error and _pending_cinfo[key] is not None:
            wmlerr(pywmlx.nodemanip.fileref + ":" + str(lineno),
                "#%s directive(s) not applied: %s" % (key, _pending_cinfo[key]))
        _pending_cinfo[key] = None


def after_pending_info(lineno, error):
    clear_pending_infos(lineno, error=error)


def checkdomain(lineno):
    global _currentdomain
    global _domains
    if _currentdomain in _domains:
        return True
    else:
        clear_pending_infos(lineno, error=True)
        return False


def switchdomain(lineno, domain):
    global _currentdomain
    if _currentdomain != domain:
        clear_pending_infos(lineno, error=True)
        _currentdomain = domain


def checksentence(mystring, finfo, *, islua=False):
    m = re.match(r'\s*$', mystring)
    if m:
        wmlwarn(finfo, "found an empty translatable message")
        return 1
    elif warnall() and not islua:
        if "}" in mystring:
            wmsg = ("found a translatable string containing a WML macro. "
                    " Translation for this string will NEVER work")
            wmlwarn(finfo, wmsg)
            return 2
        else:
            return 0
    else:
        return 0


# When handling a PendingLuaString, if the string has a plural version then
# this class is used for the PendingLuaString.plural object.
class PendingPlural:
    def __init__(self):
        self.string = ''
        # status values:
        #    'wait_string'    --> rightly after _ ( when we need to know
        #                         wich string type we will manage
        #    'wait_plural'    --> after first argument. Search for plural or
        #                         close parenthesis
        #    'wait_close'     --> expect close parenthesis
        self.status = 'wait_string'
        # pluraltype values, used for both single-line and multiline strings
        #    0: initial value, should have been changed if a string was found
        #    1: delimited by double-quotes
        #    2: delimited by single-quotes
        #    3: delimited by long brackets, self.numequals is the level of brackets
        self.pluraltype = 0
        self.numequals = 0
        self.ismultiline = False

    def addline(self, value, isfirstline=False):
        if self.pluraltype == 3 and isfirstline and value == "":
            # This should be handled by not adding (self.string + '\n') on the next call,
            # but someone can implement that if they start using long-bracket strings.
            raise NotImplementedError("Not implemented: handling of long-bracket strings that start with a newline.")
        if self.pluraltype == 3:
            value = value.replace('\\', r'\\')
        if isfirstline:
            self.string = value
        else:
            self.string = self.string + '\n' + value

    def convert(self):
        if self.pluraltype == 2:
            self.string = re.sub(r"\\\'", r"'", self.string)
        if self.pluraltype != 3 and self.pluraltype!=0:
            self.string = re.sub(r'(?<!\\)"', r'\"', self.string)
        if self.pluraltype == 3:
            self.string = self.string.replace('"', r'\"')
        if self.ismultiline:
            lf = r'\\n"' + '\n"'
            self.string = re.sub(r'(\n\r|\r\n|[\n\r])',
                                lf, self.string)
            self.string = '""\n"' + self.string + '"'
        if not self.ismultiline:
            self.string = '"' + self.string + '"'
        return PoCommentedStringPL(self.string, ismultiline=self.ismultiline)


class PendingLuaString:
    def __init__(self, lineno, luatype, luastring, ismultiline,
                 istranslatable, numequals=0, plural=None):
        self.lineno = lineno
        self.luatype = luatype
        self.luastring = ''
        self.ismultiline = ismultiline
        self.istranslatable = istranslatable
        self.numequals = numequals
        if luatype != 'lua_plural':
            self.addline(luastring, True)
        self.plural = plural

    def addline(self, value, isfirstline=False):
        if self.luatype == 'luastr3' and isfirstline and value == "":
            # This should be handled by not adding (self.string + '\n') on the next call,
            # but someone can implement that if they start using long-bracket strings.
            raise NotImplementedError("Not implemented: handling of long-bracket strings that start with a newline.")
        if self.luatype == 'luastr3':
            value = value.replace('\\', r'\\')
        if isfirstline:
            self.luastring = value
        else:
            self.luastring = self.luastring + '\n' + value

    # this function is used by store, when translating lua pending plural into
    # PoCommentedString.plural
    def storePlural(self):
        if self.plural is None:
            return None
        else:
            return self.plural.convert()

    def store(self):
        global _pending_cinfo
        global _linenosub
        if not checkdomain(self.lineno):
            return
        if self.istranslatable:
            _linenosub += 1
            finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
            fileno = pywmlx.nodemanip.fileno
            errcode = checksentence(self.luastring, finfo, islua=True)
            if errcode != 1:
                # when errcode is equal to 1, the translatable string is empty
                # so, using "if errcode != 1"
                # we will add the translatable string ONLY if it is NOT empty
                if self.luatype == 'luastr2':
                    self.luastring = re.sub(r"\\\'", r"'", self.luastring)
                if self.luatype != 'luastr3':
                    self.luastring = re.sub(r'(?<!\\)"', r'\"', self.luastring)
                if self.luatype == 'luastr3':
                    self.luastring = self.luastring.replace('"', r'\"')
                loc_wmlinfos = []
                loc_addedinfos = None
                if _pending_cinfo["po-override"] is not None:
                    loc_wmlinfos.append(_pending_cinfo["po-override"])
                if (_pending_luafuncname is not None and
                        _pending_cinfo["po-override"] is None):
                    winf = '[lua]: ' + _pending_luafuncname
                    loc_wmlinfos.append(winf)
                if _pending_cinfo["po"] is None:
                    loc_addedinfos = []
                if _pending_cinfo["po"] is not None:
                    loc_addedinfos = _pending_cinfo["po"]
                if not _currentdomain in _dictionary:
                    _dictionary[_currentdomain] = dict()
                loc_posentence = _dictionary[_currentdomain].get(self.luastring)
                if loc_posentence is None:
                    _dictionary[_currentdomain][self.luastring] = PoCommentedString(
                                self.luastring,
                                _currentdomain,
                                orderid=(fileno, self.lineno, _linenosub),
                                ismultiline=self.ismultiline,
                                wmlinfos=loc_wmlinfos, finfos=[finfo],
                                addedinfos=loc_addedinfos,
                                plural=self.storePlural() )
                else:
                    loc_posentence.update_with_commented_string(
                           PoCommentedString(
                                self.luastring,
                                _currentdomain,
                                orderid=(fileno, self.lineno, _linenosub),
                                ismultiline=self.ismultiline,
                                wmlinfos=loc_wmlinfos, finfos=[finfo],
                                addedinfos=loc_addedinfos,
                                plural=self.storePlural()
                    ) )

        # finally PendingLuaString.store() will clear pendinginfos
        # in any case (even if the pending string is not translatable)
        after_pending_info(self.lineno, not self.istranslatable)


class PendingWmlString:
    def __init__(self, lineno, wmlstring, ismultiline, istranslatable, israw):
        """The israw argument indicates a << >> delimited string"""
        self.lineno = lineno
        self.wmlstring = wmlstring.replace('\\', r'\\')
        self.ismultiline = ismultiline
        self.istranslatable = istranslatable
        self.israw = israw

    def addline(self, value):
        self.wmlstring = self.wmlstring + '\n' + value.replace('\\', r'\\')

    def store(self):
        global _linenosub
        global _pending_cinfo
        global _pending_winfotype
        if _pending_winfotype is not None:
            if self.ismultiline is False and self.istranslatable is False:
                winf = _pending_winfotype + '=' + self.wmlstring
                pywmlx.nodemanip.addWmlInfo(winf)
            _pending_winfotype = None
        if not checkdomain(self.lineno):
            return
        if self.istranslatable:
            finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
            errcode = checksentence(self.wmlstring, finfo, islua=False)
            if errcode != 1:
                # when errcode is equal to 1, the translatable string is empty
                # so, using "if errcode != 1"
                # we will add the translatable string ONLY if it is NOT empty
                _linenosub += 1
                if self.israw:
                    self.wmlstring = re.sub('"', r'\"', self.wmlstring)
                else:
                    self.wmlstring = re.sub('""', r'\"', self.wmlstring)
                pywmlx.nodemanip.addNodeSentence(self.wmlstring,
                                             domain=_currentdomain,
                                             ismultiline=self.ismultiline,
                                             lineno=self.lineno,
                                             lineno_sub=_linenosub,
                                             override=_pending_cinfo["po-override"],
                                             addition=_pending_cinfo["po"])
        after_pending_info(self.lineno, not self.istranslatable)


def addstate(name, value):
    global _states
    if _states is None:
        _states = {}
    _states[name.lower()] = value


def setup(dictionary, initialdomain, domains, wall, fdebug):
    global _dictionary
    global _initialdomain
    global _domains
    global _warnall
    global _debugmode
    global _fdebug
    _dictionary = dictionary
    _initialdomain = initialdomain
    if domains is not None:
        _domains = set(domains)
    _warnall = wall
    _fdebug = fdebug
    if fdebug is None:
        _debugmode = False
    else:
        _debugmode = True
    setup_luastates()
    setup_wmlstates()


def run(*, filebuf, fileref, fileno, startstate, waitwml=True):
    global _states
    global _current_lineno
    global _linenosub
    global _waitwml
    global _currentdomain
    global _dictionary
    global _pending_luafuncname
    global _on_luatag
    _pending_luafuncname = None
    _on_luatag = False
    # cs is "current state"
    cs = _states.get(startstate)
    cs_debug = startstate
    _current_lineno = 0
    _linenosub = 0
    _waitwml = waitwml
    _currentdomain = _initialdomain
    pywmlx.nodemanip.newfile(fileref, fileno)
    # debug_cs = startstate
    try:
        for xline in filebuf:
            xline = xline.strip('\n\r')
            _current_lineno += 1
            # on new line, debug file will write another marker
            if _debugmode:
                print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
                      file=_fdebug)
            while xline is not None:
                # print debug infos (if debugmode is on)
                if _debugmode:
                    lno = '%05d' % _current_lineno
                    print('---------------------------------------------------',
                          file=_fdebug)
                    print('LINE', lno, '|', xline, file=_fdebug)
                # action number is used to know what function we should run
                action = 0
                v = None
                m = None
                if cs.regex is None:
                    # action = 1 --> execute state.run
                    action = 1
                    if _debugmode:
                        print('ALWAYS-RUN x', cs_debug, file=_fdebug)
                else:
                    # m is match
                    m = re.match(cs.regex, xline)
                    if m:
                        # action = 1 --> execute state.run
                        action = 1
                        if _debugmode:
                            print('RUN state  \\', cs_debug, file=_fdebug)
                    else:
                        # action = 2 --> change to the state pointed by
                        #                state.iffail
                        action = 2
                        if _debugmode:
                            print('FAIL state |', cs_debug, file=_fdebug)
                if action == 1:
                    # xline, ns: xline --> override xline with new value
                    #            ns --> value of next state
                    xline, ns = cs.run(xline, _current_lineno, m)
                    cs_debug = ns
                    cs = _states.get(ns)
                else:
                    cs_debug = cs.iffail
                    cs = _states.get(cs.iffail)
            # end while xline
        # end for xline
    except UnicodeDecodeError as e:
        if "test_cve_2018_1999023_2.cfg" in pywmlx.nodemanip.fileref:
            # This unit test is allowed to contain invalid UTF-8. Ignore it.
            return
        errpos = int(e.start)  # error position on file object with UTF-8 error
        errbval = hex(e.object[errpos]) # value of byte wich causes UTF-8 error
        # well... when exception occurred, the _current_lineno value
        # was not updated at all due to the failure of the try block.
        # (it is = 0)
        # this means we need to make a workaround to obtain in what line of the
        # file the problem happened.
        # In order to perform this task (and not only) we create a temporary
        # string wich contains all the file text UNTIL the UTF-8
        untilerr_buf = e.object[0:errpos] # buffer containing file text
        untilerr = "".join(map(chr, untilerr_buf))
        # splituntil will be a array of strings (each item is a line of text).
        # the last item will show the point where the invalid UTF-8 character
        # was found.
        splituntil = untilerr.split('\n')
        # error line is equal of lines of text until error occurs (number of
        # items on splituntil string array)
        errlineno = len(splituntil)
        # finally we can know the actual file info
        finfo = pywmlx.nodemanip.fileref + ":" + str(errlineno)
        errmsg = (
            "UTF-8 Format error.\nCan't decode byte " + str(errbval) + ' (' +
            e.reason + ').\n' +
            'Probably your file is not encoded with UTF-8 encoding: you ' +
            'should open the file with an advanced text editor, and re-save ' +
            'it with UTF-8 encoding.\n' +
            'To avoid this problem in the future, you might want to set ' +
            'the default encoding of your editor to UTF-8.\n\n' +
            'Text preceding the invalid byte (source file, line ' +
            str(errlineno) + '):\n' + splituntil[-1] + '\n'
        )
        wmlerr(finfo, errmsg)
    pywmlx.nodemanip.closefile(_dictionary, _current_lineno)