wmllint and wmliterator: open files as UTF-8 and use Unicode literals

This is a single commit, because modifying only one of the two files broke the other
This commit is contained in:
Elvish_Hunter 2015-08-02 22:03:57 +02:00
parent ab88b9091f
commit d91c9f1fba
2 changed files with 31 additions and 28 deletions

View File

@ -20,7 +20,9 @@ Limitations:
enough for now.
"""
import sys, re, copy
from __future__ import unicode_literals
import sys, re, copy, codecs
keyPattern = re.compile('(\w+)(,\s?\w+)*\s*=')
keySplit = re.compile(r'[=,\s]')
tagPattern = re.compile(r'(^|(?<![\w|}]))(\[/?\+?[a-z _]+\])')
@ -125,9 +127,8 @@ Important Attributes:
lines = []
if filename:
try:
ifp = open(self.fname)
with codecs.open(self.fname, "r", "utf8") as ifp:
lines = ifp.readlines()
ifp.close()
except Exception:
self.printError('error opening file')
self.lines = lines
@ -478,11 +479,10 @@ if __name__ == '__main__':
continue
print 'Reading', fname+'...'
didSomething = True
f = open(fname)
with codecs.open(fname, "r", "utf8") as f:
itor = WmlIterator(f.readlines())
for i in itor:
pass
f.close()
print itor.lineno + itor.span, 'lines read.'
if not didSomething:
print 'That is not a valid .cfg file'

View File

@ -181,9 +181,9 @@
# code.
#
from __future__ import print_function
from __future__ import print_function, unicode_literals
import sys, os, re, getopt, string, copy, difflib, time, gzip
import sys, os, re, getopt, string, copy, difflib, time, gzip, codecs
from wesnoth.wmltools import *
from wesnoth.wmliterator import *
@ -2175,9 +2175,11 @@ def translator(filename, mapxforms, textxform):
global tagstack
gzipped = filename.endswith(".gz")
if gzipped:
unmodified = gzip.open(filename).readlines()
with gzip.open(filename) as content:
unmodified = content.readlines()
else:
unmodified = file(filename).readlines()
with codecs.open(filename, "r", "utf8") as content:
unmodified = content.readlines()
# Pull file into an array of lines, CR-stripping as needed
mfile = []
map_only = filename.endswith(".map")
@ -2433,16 +2435,16 @@ def inner_spellcheck(nav, value, spelldict):
("@", " "),
(")", " "),
("(", " "),
("\xe2\x80\xa6", " "), # UTF-8 ellipsis
("\xe2\x80\x94", " "), # UTF-8 em dash
("\xe2\x80\x93", " "), # UTF-8 en dash
("\xe2\x80\x95", " "), # UTF-8 horizontal dash
("\xe2\x88\x92", " "), # UTF-8 minus sign
("\xe2\x80\x99", "'"), # UTF-8 right single quote
("\xe2\x80\x98", "'"), # UTF-8 left single quote
("\xe2\x80\x9d", " "), # UTF-8 right double quote
("\xe2\x80\x9c", " "), # UTF-8 left double quote
("\xe2\x80\xa2", " "), # UTF-8 bullet
("", " "), # UTF-8 ellipsis
("", " "), # UTF-8 em dash
("", " "), # UTF-8 en dash
("", " "), # UTF-8 horizontal dash
("", " "), # UTF-8 minus sign
("", "'"), # UTF-8 right single quote
("", "'"), # UTF-8 left single quote
("", " "), # UTF-8 right double quote
("", " "), # UTF-8 left double quote
("", " "), # UTF-8 bullet
("◦", ""), # Why is this necessary?
("''", ""),
("female^", " "),
@ -2913,9 +2915,10 @@ In your case, your system interprets your arguments as:
if os.path.exists(backup):
fromdate = time.ctime(os.stat(backup).st_mtime)
todate = time.ctime(os.stat(fn).st_mtime)
fromlines = open(backup, 'U').readlines()
tolines = open(fn, 'U').readlines()
diff = difflib.unified_diff(fromlines, tolines,
with codecs.open(backup, "r", "utf8") as fromlines, \
codecs.open(fn, "r", "utf8") as tolines:
diff = difflib.unified_diff(fromlines.readlines(),
tolines.readlines(),
backup, fn, fromdate, todate, n=3)
sys.stdout.writelines(diff)
else:
@ -2935,7 +2938,7 @@ In your case, your system interprets your arguments as:
with gzip.open(fn, "w") as ofp:
ofp.write(changed)
else:
with open(fn, "w") as ofp:
with codecs.open(fn, "w", "utf8") as ofp:
ofp.write(changed)
#except maptransform_error, e:
# print("wmllint: " + `e`, file=sys.stderr)