#!/usr/bin/perl -w # FIXME: # - maybe restrict "ability" matching to unit defs (not yet necessary) use strict; use File::Basename; use POSIX qw(strftime); use Getopt::Long; # use utf8 for stdout and file reading use utf8; use open IO => ':utf8'; binmode(STDOUT, ':utf8'); our $toplevel = '.'; our $initialdomain = 'wesnoth'; our $domain = undef; GetOptions ('directory=s' => \$toplevel, 'initialdomain=s' => \$initialdomain, 'domain=s' => \$domain); $domain = $initialdomain unless defined $domain; sub wml2postring { my $str = shift; $str =~ s/\"\"/\\\"/mg; # "" -> \" $str =~ s/^(.*)$/"$1\\n"/mg; # Surround with doublequotes and add final escaped newline $str =~ s/\n$/\n"\\n"/mg; # Split the string around newlines: "foo\nbar" -> "foo\n" LF "bar" $str =~ s/\\n\"$/\"\n/g; # Remove final escaped newline and add newline after last line return $str; } sub lua2postring { my $str = shift; my $quote = shift; if ($quote eq "'") { $str =~ s/"/\\"/g; } $str =~ s/^(.*)$/"$1\\n"/mg; # Surround with doublequotes and add final escaped newline $str =~ s/\n$/\n"\\n"/mg; # Split the string around newlines: "foo\nbar" -> "foo\n" LF "bar" $str =~ s/\\n\"$/\"\n/g; # Remove final escaped newline and add newline after last line return $str; } ## extract strings with their refs and node info into %messages sub possible_node_info { my ($nodeinfostack, $field, $value) = @_; if ($field =~ m/\b(speaker|id|role|description|condition|type|race)\b/) { push @{$nodeinfostack->[-1][2]}, "$field=$value"; } } sub read_wml_file { my ($file) = @_; our (%messages,%nodeinfo); my ($str, $translatable, $line); my $readingattack = 0; my @nodeinfostack = (["top", [], [], []]); # dummy top node my @domainstack = ($initialdomain); my $valid_wml = 1; open (FILE, "<$file") or die "cannot read from $file"; LINE: while () { # record a #define scope if (m/\#define/) { unshift @domainstack, $domainstack[0]; next LINE; } elsif (m/^\s*\#enddef/) { shift @domainstack; } # change the current textdomain when hitting the directive if (m/\#textdomain\s+(\S+)/) { $domainstack[0] = $1; next LINE; } if (m/\#\s*po:\s+(.+)/) { push @{$nodeinfostack[-1][3]}, $1; } # skip other # lines as comments next LINE if m/^\s*\#/ and !defined $str and not m/#\s*wmlxgettext/; if (!defined $str and m/^(?:[^\"]*?)((?:_\s*)?)\"([^\"]*(?:\"\"[^\"]*)*)\"(.*)/) { # single-line quoted string $translatable = ($1 ne ''); my $rest = $3; # if translatable and in the requested domain if ($translatable and $domainstack[0] eq $domain) { my $msg = wml2postring($2); push @{$messages{$msg}}, "$file:$."; push @{$nodeinfostack[-1][1]}, $msg if $valid_wml; } elsif (not $translatable and m/(\S+)\s*=\s*\"([^\"]*)\"/) { # may be a piece of node info to extract possible_node_info(\@nodeinfostack, $1, $2) if $valid_wml; } # process remaining of the line $_ = $rest . "\n"; redo LINE; } elsif (!defined $str and m/^(?:[^\"]*?)((?:_\s*)?)\s*\"([^\"]*(?:\"\"[^\"]*)*)/) { # start of multi-line $translatable = ($1 ne ''); $_ = $2; if (m/(.*)\r/) { $_ = "$1\n"; } $str = $_; $line = $.; } elsif (m/([^\"]*(?:\"\"[^\"]*)*)\"(.*)/) { # end of multi-line die "end of string without a start in $file" if !defined $str; $str .= $1; if ($translatable and $domainstack[0] eq $domain) { my $msg = "\"\"\n" . wml2postring($str); push @{$messages{$msg}}, "$file:$line" ; push @{$nodeinfostack[-1][1]}, $msg if $valid_wml; } $str = undef; # process remaining of the line $_ = $2 . "\n"; redo LINE; } elsif (defined $str) { # part of multi-line if (m/(.*)\r/) { $_ = "$1\n"; } $str .= $_; } elsif (m/(\S+)\s*=\s*(.*?)\s*$/) { # single-line non-quoted string die "nested string in $file" if defined $str; possible_node_info(\@nodeinfostack, $1, $2) if $valid_wml; ### probably not needed ### # # magic handling of weapon descriptions # push @{$messages{wml2postring($2)}}, "$file:$." # if $readingattack and # ($1 eq 'name' or $1 eq 'type' or $1 eq 'special'); # # # magic handling of unit abilities # push @{$messages{wml2postring($2)}}, "$file:$." # if $1 eq 'ability'; } elsif (m,\[attack\],) { $readingattack = 1; } elsif (m,\[/attack\],) { $readingattack = 0; } # check for node opening/closing to handle message metadata next LINE if not $valid_wml; next LINE if defined $str; # skip lookup if inside multi-line next LINE if m/^ *\{.*\} *$/; # skip lookup if a statement line next LINE if m/=/; # skip lookup if a field line while (m,\[ *([a-z/+].*?) *\],g) { my $nodename = $1; #my $ind = " " x (@nodeinfostack + ($nodename =~ m,/, ? 0 : 1)); if ($nodename =~ s,/ *,,) { # closing node if (@nodeinfostack == 0) { warn "empty node stack on closed node at $file:$."; $valid_wml = 0; last; } my ($openname, $messages, $metadata, $comments) = @{pop @nodeinfostack}; if ($nodename ne $openname) { warn "expected closed node \'$openname\' ". "got \'$nodename\' at $file:$."; $valid_wml = 0; last; } # some nodes should inherit parent metadata if ($nodename =~ m/option/) { $metadata = $nodeinfostack[-1][2]; } #print STDERR "$ind<<< $.: $nodename\n"; #print STDERR "==> $file:$.: $nodename: @{$metadata}\n" if @{$messages}; for my $msg (@{$messages}) { push @{$nodeinfo{$msg}}, [$nodename, $metadata, $comments]; } } else { # opening node #print STDERR "$ind>>> $.: $nodename\n"; $nodename =~ s/\+//; push @nodeinfostack, [$nodename, [], [], []]; } } # do not add anything here, beware of the next's before the loop } pop @nodeinfostack if @nodeinfostack; # dummy top node if (@nodeinfostack) { warn "non-empty node stack at end of $file"; $valid_wml = 0; } close FILE; if (not $valid_wml) { warn "WML seems invalid for $file, node info extraction forfeited ". "past the error point"; } } sub read_lua_file { my ($file) = @_; our (%messages,%nodeinfo); my ($str, $translatable, $line, $quote); my $curdomain = $initialdomain; my $metadata = undef; open (FILE, "<$file") or die "cannot read from $file"; LINE: while () { if (defined $metadata and m/--\s*po:\s*(.*)$/) { push @{@{$metadata}[2]}, $1; } # skip comments unless told not to next LINE if m/^\s*--/ and not defined $str and not m/--\s*wmlxgettext/; # change the textdomain if (m/textdomain\s*\(?\s*"([^"]+)"(.*)/) { $curdomain = $1; # process rest of the line $_ = $2 . "\n"; redo LINE; } # Single-line quoted string # TODO: support [[long strings]] # TODO: get 'singlequotes' to work if (not defined $str and m/^(?:[^"]*?)((?:_\s*)?)\s*(")([^"]*(?:\\"[^"]*)*)"(.*)/) { # $1 = underscore or not # $2 = quote (double or single) # $3 = string # $4 = rest my $translatable = ($1 ne ''); my $rest = $4; if ($translatable and $curdomain eq $domain) { my $msg = lua2postring($3, $2); push @{$messages{$msg}}, "$file:$."; push @{$nodeinfo{$msg}}, $metadata if defined $metadata; } # process rest of the line $_ = $rest . "\n"; redo LINE; } # Begin multiline quoted string elsif (not defined $str and m/^(?:[^"]*?)((?:_\s*)?)\s*(")([^"]*(?:\\"[^"]*)*)/) { # $1 = underscore or not # $2 = quote (double or single) # $3 = string $translatable = ($1 ne ''); $quote = $2; $str = $3; $line = $.; } # End multiline quoted string elsif (m/([^"]*(?:\\"[^"]*)*)(")(.*)/) { # $1 = string # $2 = quote (double or single) # $3 = rest die "end of string without a start in $file" if not defined $str; $str .= $1; my $rest = $3; # TODO: ensure $quote eq $2 when this matches if ($translatable and $curdomain eq $domain) { my $msg = "\"\"\n" . lua2postring($str, $quote); push @{$messages{$msg}}, "$file:$line"; push @{$nodeinfo{$msg}}, $metadata if defined $metadata; } $str = undef; $translatable = undef; $line = undef; $quote = undef; # process rest of the line $_ = $rest . "\n"; redo LINE; } # Middle of multiline quoted string elsif (defined $str) { $str .= $_; } # Function or WML tag elsif (m/function\s+([a-zA-Z0-9_.]+)\s*\(/ or m/([a-zA-Z0-9_.]+)\s*=\s*function/) { $metadata = ["lua", [$1], []]; } } } our (%messages,%nodeinfo); chdir $toplevel; foreach my $file (@ARGV) { if ($file =~ m/\.lua$/i) { read_lua_file($file); } else { read_wml_file($file); } } ## index strings by their location in the source so we can sort them our @revmessages; foreach my $key (keys %messages) { foreach my $line (@{$messages{$key}}) { my ($file, $lineno) = split /:/, $line; push @revmessages, [ $file, $lineno, $key ]; } } # sort them @revmessages = sort { $a->[0] cmp $b->[0] or $a->[1] <=> $b->[1] } @revmessages; ## output my $date = strftime "%F %R%z", localtime(); print <\\n" "Language-Team: LANGUAGE \\n" "MIME-Version: 1.0\\n" "Content-Type: text/plain; charset=UTF-8\\n" "Content-Transfer-Encoding: 8bit\\n" EOH ; foreach my $occurence (@revmessages) { my $key = $occurence->[2]; if (defined $messages{$key}) { if (defined $nodeinfo{$key}) { my %added; for my $info (@{$nodeinfo{$key}}) { my ($name, $data, $comments) = @{$info}; my $desc = join(", ", @{$data}); my $nodestr = $desc ? "[$name]: $desc" : "[$name]"; # Add only unique node info strings. if (not defined $added{$nodestr}) { $added{$nodestr} = 1; printf "#. %s\n", $nodestr; } foreach my $comment (@{$comments}) { printf "#. $comment\n"; } } } printf "#: %s\n", join(" ", @{$messages{$key}}); print "msgid $key"; print "msgstr \"\"\n\n"; # be sure we don't output the same message twice delete $messages{$key}; } }