wesnoth/utils/pofix.py
Steven Panek 0b057a9864 Made it so that pofix converts makeshift dashes to real em dashes;...
...made it convert hyphen-minuses that are being used as minus signs
to the Unicode minus sign; 'aint' -> 'ain't' in EI.
2010-05-12 22:22:02 +00:00

293 lines
7.9 KiB
Python
Executable File

#!/usr/bin/env python
# pofix - perform string fixups on incoming .po files.
#
# The purpose of this script is to save translators from having to
# apply various string fixes needed before stable release by hand. It is
# intended to be run on each incoming .po file as the Lord of
# Translations receives it. However, translators may run it on their
# own .po files to be sure, as a second application will harmlessly do
# nothing.
#
# To use this script, give it one or more paths to .po files as
# command-line arguments. Each file will be tweaked as needed.
# It should work on Windows and MacOS X as well as Linux, provided
# you have Python installed.
#
# This script will emit a report line for each file it modifies,
# and save a backup copy of the original with extension "-bak".
#
# This script will tell you when it is obsolete. Run it against all .po
# files in the main Wesnoth tree; when it says none are older than this script,
# it can be discarded (assunming that it has in fact been used to transform
# all incoming .po files in the meantime).
#
# Example usage:
# utils/pofix.py po/wesnoth*/*.po*
# find data/campaigns/ -name '*.cfg' -print0 | xargs -0 utils/pofix.py
#
# Three lines in the structure below, marked with "#*", imply changes of
# meaning that may require a change in translation.
stringfixes = {
"wesnoth" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
# Convert hyphen-minuses that are being used as minus signs
# to the Unicode minus sign
("-25", "\xe2\x88\x9225"),
("-1", "\xe2\x88\x921"),
),
"wesnoth-aoi" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-did" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-dm" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-dw" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-ei" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
# aint -> ain't
("aint", "ain't"),
),
"wesnoth-httt" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-low" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-manual" : (
),
"wesnoth-nr" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-thot" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-trow" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-tsg" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-sof" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-sotbe" :(
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-tb" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-tutorial" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-units" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"wesnoth-utbs" : (
# Convert makeshift dashes:
("- - -", "\xe2\x80\x94"),
("- -", "\xe2\x80\x94"),
("---", "\xe2\x80\x94"),
("--", "\xe2\x80\x94"),
(" -", "\xe2\x80\x94"),
("- ", "\xe2\x80\x94 "),
),
"1.8-announcement" : (
("WML events an AI components", "WML events and AI components"),
("1.7.3", "1.7.13"),
("/tags/1.8/", "/tags/1.8.0/"),
),
}
# Speak, if all argument files are newer than this timestamp
# Try to use UTC here
# date --utc "+%s # %c"
timecheck = 1262364535 # Fri 01 Jan 2010 04:48:55 PM UTC
import os, sys, time, stat, re
if __name__ == '__main__':
newer = 0
modified = 0
pocount = 0
for path in sys.argv[1:]:
if not path.endswith(".po") and not path.endswith(".pot") and not path.endswith(".cfg"):
continue
try:
pocount += 1
# Notice how many files are newer than the time check
statinfo = os.stat(path)
if statinfo.st_mtime > timecheck:
newer += 1
# Read the content of each file and transform it
before = open(path, "r").read()
after = before
decommented = re.sub("#.*", "", before)
for (domain, fixes) in stringfixes.items():
for (old, new) in fixes:
if old is new:
#complain loudly
print "pofix: old string\n\t\"%s\"\n equals new string\n\t\"%s\"\nexiting." % (old, new)
sys.exit(1)
#this check is problematic and the last clause is added to prevent false
#positives in case that new is a substring of old, though this can also
#lead to "real" probs not found, the real check would be "does replacing
#old with new lead to duplicate msgids? (including old ones marked with #~)"
#which is not easily done in the current design...
elif new in decommented and old in decommented and not new in old:
print "pofix: %s already includes the new string\n\t\"%s\"\nbut also the old\n\t\"%s\"\nthis needs handfixing for now since it likely creates duplicate msgids." % (path, new, old)
else:
lines = after.split('\n')
for (i, line) in enumerate(lines):
if line and line[0] != '#':
lines[i] = lines[i].replace(old, new)
after = '\n'.join(lines)
if after != before:
print "pofix: %s modified" % path
modified += 1
# Save a backup
os.rename(path, path + "-bak")
# Write out transformed version
ofp = open(path, "w")
ofp.write(after)
ofp.close()
except OSError:
print >>sys.stderr, "pofix: I can't see %s" % path
print "pofix: %d files processed, %d files modified, %d files newer" \
% (pocount, modified, newer)
if pocount > 1 and newer == pocount:
print "pofix: script may be obsolete"