wmllint and wmliterator: open files as UTF-8 and use Unicode literals

This is a single commit, because modifying only one of the two files broke the other
2025-05-08 17:26:24 +00:00 · 2015-08-02 22:03:57 +02:00 · 2015-08-02 22:03:57 +02:00 · d91c9f1fba
commit d91c9f1fba
parent ab88b9091f
2 changed files with 31 additions and 28 deletions
--- a/data/tools/wesnoth/wmliterator.py
+++ b/data/tools/wesnoth/wmliterator.py
@ -20,7 +20,9 @@ Limitations:
 enough for now.
 """

-import sys, re, copy
+from __future__ import unicode_literals
+
+import sys, re, copy, codecs
 keyPattern = re.compile('(\w+)(,\s?\w+)*\s*=')
 keySplit = re.compile(r'[=,\s]')
 tagPattern = re.compile(r'(^|(?<![\w|}]))(\[/?\+?[a-z _]+\])')
@ -125,9 +127,8 @@ Important Attributes:
            lines = []
            if filename:
                try:
-                    ifp = open(self.fname)
-                    lines = ifp.readlines()
-                    ifp.close()
+                    with codecs.open(self.fname, "r", "utf8") as ifp:
+                        lines = ifp.readlines()
                except Exception:
                    self.printError('error opening file')
        self.lines = lines
@ -478,11 +479,10 @@ if __name__ == '__main__':
            continue
        print 'Reading', fname+'...'
        didSomething = True
-        f = open(fname)
-        itor = WmlIterator(f.readlines())
-        for i in itor:
-            pass
-        f.close()
+        with codecs.open(fname, "r", "utf8") as f:
+            itor = WmlIterator(f.readlines())
+            for i in itor:
+                pass
        print itor.lineno + itor.span, 'lines read.'
    if not didSomething:
        print 'That is not a valid .cfg file'
--- a/data/tools/wmllint
+++ b/data/tools/wmllint
@ -181,9 +181,9 @@
 # code.
 #

-from __future__ import print_function
+from __future__ import print_function, unicode_literals

-import sys, os, re, getopt, string, copy, difflib, time, gzip
+import sys, os, re, getopt, string, copy, difflib, time, gzip, codecs
 from wesnoth.wmltools import *
 from wesnoth.wmliterator import *

@ -2175,9 +2175,11 @@ def translator(filename, mapxforms, textxform):
    global tagstack
    gzipped = filename.endswith(".gz")
    if gzipped:
-        unmodified = gzip.open(filename).readlines()
+        with gzip.open(filename) as content:
+            unmodified = content.readlines()
    else:
-        unmodified = file(filename).readlines()
+        with codecs.open(filename, "r", "utf8") as content:
+            unmodified = content.readlines()
    # Pull file into an array of lines, CR-stripping as needed
    mfile = []
    map_only = filename.endswith(".map")
@ -2433,16 +2435,16 @@ def inner_spellcheck(nav, value, spelldict):
        ("@", " "),
        (")", " "),
        ("(", " "),
-        ("\xe2\x80\xa6", " "),  # UTF-8 ellipsis
-        ("\xe2\x80\x94", " "),  # UTF-8 em dash
-        ("\xe2\x80\x93", " "),  # UTF-8 en dash
-        ("\xe2\x80\x95", " "),  # UTF-8 horizontal dash
-        ("\xe2\x88\x92", " "),  # UTF-8 minus sign
-        ("\xe2\x80\x99", "'"),  # UTF-8 right single quote
-        ("\xe2\x80\x98", "'"),  # UTF-8 left single quote
-        ("\xe2\x80\x9d", " "),  # UTF-8 right double quote
-        ("\xe2\x80\x9c", " "),  # UTF-8 left double quote
-        ("\xe2\x80\xa2", " "),  # UTF-8 bullet
+        ("…", " "),  # UTF-8 ellipsis
+        ("—", " "),  # UTF-8 em dash
+        ("–", " "),  # UTF-8 en dash
+        ("―", " "),  # UTF-8 horizontal dash
+        ("−", " "),  # UTF-8 minus sign
+        ("’", "'"),  # UTF-8 right single quote
+        ("‘", "'"),  # UTF-8 left single quote
+        ("”", " "),  # UTF-8 right double quote
+        ("“", " "),  # UTF-8 left double quote
+        ("•", " "),  # UTF-8 bullet
        ("◦", ""),              # Why is this necessary?
        ("''", ""),
        ("female^", " "),
@ -2913,10 +2915,11 @@ In your case, your system interprets your arguments as:
                    if os.path.exists(backup):
                        fromdate = time.ctime(os.stat(backup).st_mtime)
                        todate = time.ctime(os.stat(fn).st_mtime)
-                        fromlines = open(backup, 'U').readlines()
-                        tolines = open(fn, 'U').readlines()
-                        diff = difflib.unified_diff(fromlines, tolines,
-                                             backup, fn, fromdate, todate, n=3)
+                        with codecs.open(backup, "r", "utf8") as fromlines, \
+                             codecs.open(fn, "r", "utf8") as tolines:
+                            diff = difflib.unified_diff(fromlines.readlines(),
+                                                        tolines.readlines(),
+                                                        backup, fn, fromdate, todate, n=3)
                        sys.stdout.writelines(diff)
                else:
                    if "~" in fn:
@ -2935,7 +2938,7 @@ In your case, your system interprets your arguments as:
                                    with gzip.open(fn, "w") as ofp:
                                        ofp.write(changed)
                                else:
-                                    with open(fn, "w") as ofp:
+                                    with codecs.open(fn, "w", "utf8") as ofp:
                                        ofp.write(changed)
                    #except maptransform_error, e:
                    #    print("wmllint: " + `e`, file=sys.stderr)