diff options
| author | Daniel Stenberg <daniel@haxx.se> | 2006-04-03 21:11:11 +0000 |
|---|---|---|
| committer | Daniel Stenberg <daniel@haxx.se> | 2006-04-03 21:11:11 +0000 |
| commit | c06e7772ff81ed4bbc78377a6e16456456f3e96c (patch) | |
| tree | 0eee2026f47d5041461d2a35349f0c2175e97ab0 /tools/genlang | |
| parent | a87203651e35f368bf1d8bca5a846a0b9fb657c1 (diff) | |
| download | rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.zip rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.gz rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.bz2 rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.xz | |
langv2
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9470 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'tools/genlang')
| -rwxr-xr-x | tools/genlang | 611 |
1 files changed, 535 insertions, 76 deletions
diff --git a/tools/genlang b/tools/genlang index cde23f8..07c866a 100755 --- a/tools/genlang +++ b/tools/genlang @@ -1,28 +1,430 @@ #!/usr/bin/perl -s +# __________ __ ___. +# Open \______ \ ____ ____ | | _\_ |__ _______ ___ +# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / +# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < +# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ +# \/ \/ \/ \/ \/ +# $Id$ +# +# Copyright (C) 2006 by Daniel Stenberg +# + +# binary version for the binary lang file +my $langversion = 2; # 2 is the latest one used in the v1 format + +# A note for future users and readers: The original v1 language system allowed +# the build to create and use a different language than english built-in. We +# removed that feature from our build-system, but the build scripts still had +# the ability. But, starting now, this ability is no longer provided since I +# figured it was boring and unnecessary to write support for now since we +# don't use it anymore. if(!$ARGV[0]) { print <<MOO -Usage: genlang [-p=<prefix>] <language file> +Usage: genlang2 [options] <langv2 file> + + -p=<prefix> + Make the tool create a [prefix].c and [prefix].h file. + + -b=<outfile> + Make the tool create a binary language (.lng) file namaed [outfile]. + The use of this option requires that you also use -e. + + -u + Update language file. Given the translated file and the most recent english + file, you\'ll get an updated version sent to stdout. Suitable action to do + when you intend to update a translation. + + -e=<english lang file> + Point out the english (original source) file, to use that as master + language template. Used in combination with -b or -u. -When running this program. <prefix>.h and <prefix>.c will be created in the -"current directory". <prefix> is "lang" by default. + -t=<target> + Specify which target you want the translations/phrases for. Required when + -b or -p is used. + + -o + Voice mode output. Outputs all id: and voice: lines for the given target! + + -v + Enables verbose (debug) output. MOO ; exit; } +# How update works: +# +# 1) scan the english file, keep the whole <phrase> for each phrase. +# 2) read the translated file, for each end of phrase, compare: +# A) all source strings, if there's any change there should be a comment about +# it output +# B) the desc fields +# +# 3) output the phrase with the comments from above +# 4) check which phrases that the translated version didn't have, and spit out +# the english version of those +# + my $prefix = $p; -if(!$prefix) { - $prefix="lang"; +my $binary = $b; +my $update = $u; + +my $english = $e; +my $voiceout = $o; + +my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0); + +if($check > 1) { + print "Please use only one of -p, -u, -o and -b\n"; + exit; +} +if(!$check) { + print "Please use at least one of -p, -u, -o and -b\n"; + exit; +} +if(($binary || $update || $voiceout) && !$english) { + print "Please use -e too when you use -b, -o or -u\n"; + exit; +} + +my $target = $t; +if(!$target && !$update) { + print "Please specify a target (with -t)!\n"; + exit; } +my $verbose=$v; + +my %id; # string to num hash +my @idnum; # num to string array + +my %source; # id string to source phrase hash +my %dest; # id string to dest phrase hash +my %voice; # id string to voice phrase hash my $input = $ARGV[0]; -open(HFILE, ">$prefix.h"); -open(CFILE, ">$prefix.c"); +my @m; +my $m="blank"; + +sub match { + my ($string, $pattern)=@_; + + $pattern =~ s/\*/.?*/g; + $pattern =~ s/\?/./g; + + return ($string =~ $pattern); +} + +sub blank { + # nothing to do +} + +my %head; +sub header { + my ($full, $n, $v)=@_; + $head{$n}=$v; +} + +my %phrase; +sub phrase { + my ($full, $n, $v)=@_; + $phrase{$n}=$v; +} + +sub parsetarget { + my ($debug, $strref, $full, $n, $v)=@_; + my $string; + my @all= split(" *, *", $n); + my $test; + for $test (@all) { +# print "TEST ($debug) $target for $test\n"; + if(match($target, $test)) { + $string = $v; +# print "MATCH: $test => $v\n"; + } + } + if($string) { + $$strref = $string; + } + return $string; +} + +my $src; +sub source { + parsetarget("src", \$src, @_); +} + +my $dest; +sub dest { + parsetarget("dest", \$dest, @_); +} + +my $voice; +sub voice { + parsetarget("voice", \$voice, @_); +} + +my %idmap; +my %english; +if($english) { + # For the cases where the english file needs to be scanned/read, we do + # it before we read the translated file. For -b it isn't necessary, but for + # -u it is convenient. + + my $idnum=0; # start with a true number + my $vidnum=0x8000; # first voice id + open(ENG, "<$english") || die "can't open $english"; + my @phrase; + my $id; + while(<ENG>) { + + # get rid of DOS newlines + $_ =~ s/\r//g; + + if($_ =~ /^ *\<phrase\>/) { + # this is the start of a phrase + } + elsif($_ =~ /^ *\<\/phrase\>/) { + # this is the end of a phrase, add it to the english hash + $english{$id}=join("", @phrase); + undef @phrase; + } + elsif($_ ne "\n") { + # gather everything related to this phrase + push @phrase, $_; + } + + if($_ =~ /^ *id: ([^ \t\n]+)/i) { + $id=$1; + # voice-only entries get a difference range + if($id =~ /^VOICE_/) { + # Assign an ID number to this entry + $idmap{$id}=$vidnum; + $vidnum++; + } + else { + # Assign an ID number to this entry + $idmap{$id}=$idnum; + $idnum++; + } + } + } + close(ENG); +} + +# a function that compares the english phrase with the translated one. +# compare source strings and desc + +# Then output the updated version! +sub compare { + my ($idstr, $engref, $locref)=@_; + my ($edesc, $ldesc); + my ($esource, $lsource); + my $mode=0; + + for my $l (@$engref) { + if($l =~ /^ *desc: (.*)/) { + $edesc=$1; + } + elsif($l =~ / *\<source\>/i) { + $mode=1; + } + elsif($mode) { + if($l =~ / *\<\/source\>/i) { + last; + } + $esource .= "$l\n"; + } + } + + my @show; + my @source; + + $mode = 0; + for my $l (@$locref) { + if($l =~ /^ *desc: (.*)/) { + $ldesc=$1; + if($edesc ne $ldesc) { + $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n"; + } + push @show, $l; + } + elsif($l =~ / *\<source\>/i) { + $mode=1; + push @show, $l; + } + elsif($mode) { + if($l =~ / *\<\/source\>/i) { + $mode = 0; + print @show; + if($esource ne $lsource) { + print "### The <source> section differs from the english!\n", + "### the previously used one is commented below:\n"; + for(split("\n", $lsource)) { + print "### $_\n"; + } + print $esource; + } + else { + print $lsource; + } + undef @show; # start over + + push @show, $l; + } + else { + $lsource .= "$l"; + } + } + else { + push @show, $l; + } + } + + + print @show; +} + +my $idcount; # counter for lang ID numbers +my $voiceid=0x8000; # counter for voice-only ID numbers + +# +# Now start the scanning of the selected language string +# + +open(LANG, "<$input"); +my @phrase; +while(<LANG>) { + + $line++; + + # get rid of DOS newlines + $_ =~ s/\r//g; + + if($_ =~ /^( *\#|[ \t\n\r]*\z)/) { + # comment or empty line + next; + } + + my $ll = $_; -print HFILE <<MOO -/* This file was automatically generated using genlang */ + # print "M: $m\n"; + + push @phrase, $ll; + + # this is an XML-lookalike tag + if(/ *<([^>]*)>/) { + my $part = $1; + #print "P: $part\n"; + + if($part =~ /^\//) { + # this was a closing tag + + if($part eq "/phrase") { + # closing the phrase + + my $idstr = $phrase{'id'}; + my $idnum; + + if($dest =~ /^none\z/i) { + # "none" as dest means that this entire phrase is to be + # ignored + #print "dest is NONE!\n"; + } + else { + + # Use the ID name to figure out which id number range we + # should use for this phrase. Voice-only strings are + # separated. + + if($idstr =~ /^VOICE/) { + $idnum = $voiceid++; + } + else { + $idnum = $idcount++; + } + + $id{$idstr} = $idnum; + $idnum[$idnum]=$idstr; + + $source{$idstr}=$src; + $dest{$idstr}=$dest; + $voice{$idstr}=$voice; + + if($verbose) { + print "id: $phrase{id} ($idnum)\n"; + print "source: $src\n"; + print "dest: $dest\n"; + print "voice: $voice\n"; + } + + undef $src; + undef $dest; + undef $voice; + undef %phrase; + } + + if($update) { + my $e = $english{$idstr}; + + if($e) { + # compare original english with this! + my @eng = split("\n", $english{$idstr}); + + compare($idstr, \@eng, \@phrase); + + $english{$idstr}=""; # clear it + } + else { + print "### $idstr: The phrase is not used. Skipped\n"; + } + } + undef @phrase; + + } # end of </phrase> + + # starts with a slash, this _ends_ this section + $m = pop @m; # get back old value, the previous level's tag + next; + } # end of tag close + + # This is an opening (sub) tag + + push @m, $m; # store old value + $m = $1; + next; + } + + if(/^ *([^:]+): *(.*)/) { + my ($name, $val)=($1, $2); + &$m($_, $name, $val); + } +} +close(LANG); + +if($update) { + my $any=0; + for(keys %english) { + if($english{$_}) { + print "###\n", + "### This phrase below was not present in the translated file\n", + "<phrase>\n"; + print $english{$_}; + print "</phrase>\n"; + } + } +} + +if($prefix) { + # We create a .c and .h file + + open(HFILE, ">$prefix.h"); + open(CFILE, ">$prefix.c"); + + print HFILE <<MOO +/* This file was automatically generated using genlang2 */ /* * The str() macro/functions is how to access strings that might be * translated. Use it like str(MACRO) and expect a string to be @@ -37,12 +439,12 @@ extern unsigned char *language_strings[]; extern const unsigned char language_builtin[]; /* The enum below contains all available strings */ -enum { +enum \{ MOO ; -print CFILE <<MOO -/* This file was automaticly generated using genlang, the strings come + print CFILE <<MOO +/* This file was automaticly generated using genlang2, the strings come from "$input" */ #include "$prefix.h" @@ -50,87 +452,144 @@ print CFILE <<MOO unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY]; const unsigned char language_builtin[] = MOO - ; +; -open(LANG, "<$input"); -while(<LANG>) { - $line++; - if($_ =~ / *\#/) { - # comment - next; + # Output the ID names for the enum in the header file + my $i; + for $i (1 .. $idcount) { + my $name=$idnum[$i - 1]; # get the ID name + + $name =~ s/\"//g; # cut off the quotes + + printf HFILE (" %s,\n", $name); } - # get rid of DOS newlines - $_ =~ s/\r//g; - if($_ =~ / *([a-z]+): *(.*)/) { - ($var, $value) = ($1, $2); - # print "$var => $value\n"; - $set{$var} = $value; +# Output separation marker for last string ID and the upcoming voice IDs - if( (($var eq "new") && $value && ($value !~ /^\"(.*)\"\W*$/)) || - (($var eq "voice") && $value && ($value !~ /^\"(.*)\"\W*$/)) || - (($var eq "eng") && ($value !~ /^\"(.*)\"\W*$/)) ) { - print "$input:$line:missing quotes for ".$set{'id'}."\n"; - $errors++; - next; - } + print HFILE <<MOO + LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */ + /* --- below this follows voice-only strings --- */ + VOICEONLY_DELIMITER = 0x8000, +MOO + ; - if($var eq "new") { - # the last one for a single phrase +# Output the ID names for the enum in the header file + my $i; + for $i (0x8000 .. ($voiceid-1)) { + my $name=$idnum[$i]; # get the ID name + + $name =~ s/\"//g; # cut off the quotes + + printf HFILE (" %s,\n", $name); + } - if(!$value || ($value eq "\"\"") ) { - # if not set, get the english version - $value = $set{'eng'}; - } -# print "VOICE: ".$set{'voice'}." VALUE: $value\n"; - # Note: if both entries are "", the string is deprecated, - # but must be included to maintain compatibility - if($set{'id'} =~ /^VOICE_/) { - # voice-only - push @vfile, $set{'id'}; - } - else { - push @hfile, $set{'id'}; - $value =~ s/^\"(.*)\"\W*$/\"$1\\0\"/; - print CFILE " $value\n"; - } + # Output end of enum + print HFILE "\n};\n/* end of generated enum list */\n"; + + # Output the target phrases for the source file + for $i (1 .. $idcount) { + my $name=$idnum[$i - 1]; # get the ID + my $dest = $dest{$name}; # get the destination phrase + + $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote - undef %set; + if(!$dest) { + # this is just to be on the safe side + $dest = '"\0"'; } + printf CFILE (" %s\n", $dest); } -} -close(LANG); +# Output end of string chunk + print CFILE <<MOO +; +/* end of generated string list */ +MOO +; -for(@hfile) { - print HFILE " $_,\n"; + close(HFILE); + close(CFILE); +} # end of the c/h file generation +elsif($binary) { + # Creation of a binary lang file was requested + + # We must first scan the english file to get the correct order of the id + # numbers used there, as that is what sets the id order for all language + # files. The english file is scanned before the translated file was + # scanned. + + open(OUTF, ">$binary") or die "Can't create $binary"; + binmode OUTF; + printf OUTF ("\x1a%c", $langversion); # magic lang file header + + # loop over the target phrases + for $i (1 .. $idcount) { + my $name=$idnum[$i - 1]; # get the ID + my $dest = $dest{$name}; # get the destination phrase + + if($dest) { + $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes + + # Now, make sure we get the number from the english sort order: + $idnum = $idmap{$name}; + + printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest); + if($debug) { + printf("%02x => %s\n", $idnum, $value); + } + } + } } +elsif($voiceout) { + # voice output requested, display id: and voice: strings in a v1-like + # fashion -print HFILE <<MOO - LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */ - /* --- below this follows voice-only strings --- */ - VOICEONLY_DELIMITER = 0x8000, -MOO - ; + my @engl; + + # This loops over the strings in the translated language file order + my @ids = ((0 .. ($idcount-1))); + push @ids, (0x8000 .. ($voiceid-1)); + + #for my $id (@ids) { + # print "$id\n"; + #} + + for $i (@ids) { + my $name=$idnum[$i]; # get the ID + my $dest = $voice{$name}; # get the destination voice string -for(@vfile) { - print HFILE " $_,\n"; + if($dest) { + $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes + + # Now, make sure we get the number from the english sort order: + $idnum = $idmap{$name}; + + $engl[$idnum] = $i; + + # print "Input index $i output index $idnum\n"; + + } + } + for my $i (@ids) { + + my $o = $engl[$i]; + + my $name=$idnum[$o]; # get the ID + my $dest = $voice{$name}; # get the destination voice string + + print "#$i\nid: $name\nvoice: $dest\n"; + } + } -print HFILE <<MOO -}; -/* end of generated enum list */ -MOO - ; -print CFILE <<MOO -; -/* end of generated string list */ -MOO - ; +if($verbose) { + printf("%d ID strings scanned\n", $idcount); -close(CFILE); -close(HFILE); + print "* head *\n"; + for(keys %head) { + printf "$_: %s\n", $head{$_}; + } +} -exit $errors; |