summaryrefslogtreecommitdiff
path: root/tools/genlang
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2006-04-03 21:11:11 +0000
committerDaniel Stenberg <daniel@haxx.se>2006-04-03 21:11:11 +0000
commitc06e7772ff81ed4bbc78377a6e16456456f3e96c (patch)
tree0eee2026f47d5041461d2a35349f0c2175e97ab0 /tools/genlang
parenta87203651e35f368bf1d8bca5a846a0b9fb657c1 (diff)
downloadrockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.zip
rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.gz
rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.bz2
rockbox-c06e7772ff81ed4bbc78377a6e16456456f3e96c.tar.xz
langv2
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9470 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'tools/genlang')
-rwxr-xr-xtools/genlang611
1 files changed, 535 insertions, 76 deletions
diff --git a/tools/genlang b/tools/genlang
index cde23f8..07c866a 100755
--- a/tools/genlang
+++ b/tools/genlang
@@ -1,28 +1,430 @@
#!/usr/bin/perl -s
+# __________ __ ___.
+# Open \______ \ ____ ____ | | _\_ |__ _______ ___
+# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+# \/ \/ \/ \/ \/
+# $Id$
+#
+# Copyright (C) 2006 by Daniel Stenberg
+#
+
+# binary version for the binary lang file
+my $langversion = 2; # 2 is the latest one used in the v1 format
+
+# A note for future users and readers: The original v1 language system allowed
+# the build to create and use a different language than english built-in. We
+# removed that feature from our build-system, but the build scripts still had
+# the ability. But, starting now, this ability is no longer provided since I
+# figured it was boring and unnecessary to write support for now since we
+# don't use it anymore.
if(!$ARGV[0]) {
print <<MOO
-Usage: genlang [-p=<prefix>] <language file>
+Usage: genlang2 [options] <langv2 file>
+
+ -p=<prefix>
+ Make the tool create a [prefix].c and [prefix].h file.
+
+ -b=<outfile>
+ Make the tool create a binary language (.lng) file namaed [outfile].
+ The use of this option requires that you also use -e.
+
+ -u
+ Update language file. Given the translated file and the most recent english
+ file, you\'ll get an updated version sent to stdout. Suitable action to do
+ when you intend to update a translation.
+
+ -e=<english lang file>
+ Point out the english (original source) file, to use that as master
+ language template. Used in combination with -b or -u.
-When running this program. <prefix>.h and <prefix>.c will be created in the
-"current directory". <prefix> is "lang" by default.
+ -t=<target>
+ Specify which target you want the translations/phrases for. Required when
+ -b or -p is used.
+
+ -o
+ Voice mode output. Outputs all id: and voice: lines for the given target!
+
+ -v
+ Enables verbose (debug) output.
MOO
;
exit;
}
+# How update works:
+#
+# 1) scan the english file, keep the whole <phrase> for each phrase.
+# 2) read the translated file, for each end of phrase, compare:
+# A) all source strings, if there's any change there should be a comment about
+# it output
+# B) the desc fields
+#
+# 3) output the phrase with the comments from above
+# 4) check which phrases that the translated version didn't have, and spit out
+# the english version of those
+#
+
my $prefix = $p;
-if(!$prefix) {
- $prefix="lang";
+my $binary = $b;
+my $update = $u;
+
+my $english = $e;
+my $voiceout = $o;
+
+my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
+
+if($check > 1) {
+ print "Please use only one of -p, -u, -o and -b\n";
+ exit;
+}
+if(!$check) {
+ print "Please use at least one of -p, -u, -o and -b\n";
+ exit;
+}
+if(($binary || $update || $voiceout) && !$english) {
+ print "Please use -e too when you use -b, -o or -u\n";
+ exit;
+}
+
+my $target = $t;
+if(!$target && !$update) {
+ print "Please specify a target (with -t)!\n";
+ exit;
}
+my $verbose=$v;
+
+my %id; # string to num hash
+my @idnum; # num to string array
+
+my %source; # id string to source phrase hash
+my %dest; # id string to dest phrase hash
+my %voice; # id string to voice phrase hash
my $input = $ARGV[0];
-open(HFILE, ">$prefix.h");
-open(CFILE, ">$prefix.c");
+my @m;
+my $m="blank";
+
+sub match {
+ my ($string, $pattern)=@_;
+
+ $pattern =~ s/\*/.?*/g;
+ $pattern =~ s/\?/./g;
+
+ return ($string =~ $pattern);
+}
+
+sub blank {
+ # nothing to do
+}
+
+my %head;
+sub header {
+ my ($full, $n, $v)=@_;
+ $head{$n}=$v;
+}
+
+my %phrase;
+sub phrase {
+ my ($full, $n, $v)=@_;
+ $phrase{$n}=$v;
+}
+
+sub parsetarget {
+ my ($debug, $strref, $full, $n, $v)=@_;
+ my $string;
+ my @all= split(" *, *", $n);
+ my $test;
+ for $test (@all) {
+# print "TEST ($debug) $target for $test\n";
+ if(match($target, $test)) {
+ $string = $v;
+# print "MATCH: $test => $v\n";
+ }
+ }
+ if($string) {
+ $$strref = $string;
+ }
+ return $string;
+}
+
+my $src;
+sub source {
+ parsetarget("src", \$src, @_);
+}
+
+my $dest;
+sub dest {
+ parsetarget("dest", \$dest, @_);
+}
+
+my $voice;
+sub voice {
+ parsetarget("voice", \$voice, @_);
+}
+
+my %idmap;
+my %english;
+if($english) {
+ # For the cases where the english file needs to be scanned/read, we do
+ # it before we read the translated file. For -b it isn't necessary, but for
+ # -u it is convenient.
+
+ my $idnum=0; # start with a true number
+ my $vidnum=0x8000; # first voice id
+ open(ENG, "<$english") || die "can't open $english";
+ my @phrase;
+ my $id;
+ while(<ENG>) {
+
+ # get rid of DOS newlines
+ $_ =~ s/\r//g;
+
+ if($_ =~ /^ *\<phrase\>/) {
+ # this is the start of a phrase
+ }
+ elsif($_ =~ /^ *\<\/phrase\>/) {
+ # this is the end of a phrase, add it to the english hash
+ $english{$id}=join("", @phrase);
+ undef @phrase;
+ }
+ elsif($_ ne "\n") {
+ # gather everything related to this phrase
+ push @phrase, $_;
+ }
+
+ if($_ =~ /^ *id: ([^ \t\n]+)/i) {
+ $id=$1;
+ # voice-only entries get a difference range
+ if($id =~ /^VOICE_/) {
+ # Assign an ID number to this entry
+ $idmap{$id}=$vidnum;
+ $vidnum++;
+ }
+ else {
+ # Assign an ID number to this entry
+ $idmap{$id}=$idnum;
+ $idnum++;
+ }
+ }
+ }
+ close(ENG);
+}
+
+# a function that compares the english phrase with the translated one.
+# compare source strings and desc
+
+# Then output the updated version!
+sub compare {
+ my ($idstr, $engref, $locref)=@_;
+ my ($edesc, $ldesc);
+ my ($esource, $lsource);
+ my $mode=0;
+
+ for my $l (@$engref) {
+ if($l =~ /^ *desc: (.*)/) {
+ $edesc=$1;
+ }
+ elsif($l =~ / *\<source\>/i) {
+ $mode=1;
+ }
+ elsif($mode) {
+ if($l =~ / *\<\/source\>/i) {
+ last;
+ }
+ $esource .= "$l\n";
+ }
+ }
+
+ my @show;
+ my @source;
+
+ $mode = 0;
+ for my $l (@$locref) {
+ if($l =~ /^ *desc: (.*)/) {
+ $ldesc=$1;
+ if($edesc ne $ldesc) {
+ $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
+ }
+ push @show, $l;
+ }
+ elsif($l =~ / *\<source\>/i) {
+ $mode=1;
+ push @show, $l;
+ }
+ elsif($mode) {
+ if($l =~ / *\<\/source\>/i) {
+ $mode = 0;
+ print @show;
+ if($esource ne $lsource) {
+ print "### The <source> section differs from the english!\n",
+ "### the previously used one is commented below:\n";
+ for(split("\n", $lsource)) {
+ print "### $_\n";
+ }
+ print $esource;
+ }
+ else {
+ print $lsource;
+ }
+ undef @show; # start over
+
+ push @show, $l;
+ }
+ else {
+ $lsource .= "$l";
+ }
+ }
+ else {
+ push @show, $l;
+ }
+ }
+
+
+ print @show;
+}
+
+my $idcount; # counter for lang ID numbers
+my $voiceid=0x8000; # counter for voice-only ID numbers
+
+#
+# Now start the scanning of the selected language string
+#
+
+open(LANG, "<$input");
+my @phrase;
+while(<LANG>) {
+
+ $line++;
+
+ # get rid of DOS newlines
+ $_ =~ s/\r//g;
+
+ if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
+ # comment or empty line
+ next;
+ }
+
+ my $ll = $_;
-print HFILE <<MOO
-/* This file was automatically generated using genlang */
+ # print "M: $m\n";
+
+ push @phrase, $ll;
+
+ # this is an XML-lookalike tag
+ if(/ *<([^>]*)>/) {
+ my $part = $1;
+ #print "P: $part\n";
+
+ if($part =~ /^\//) {
+ # this was a closing tag
+
+ if($part eq "/phrase") {
+ # closing the phrase
+
+ my $idstr = $phrase{'id'};
+ my $idnum;
+
+ if($dest =~ /^none\z/i) {
+ # "none" as dest means that this entire phrase is to be
+ # ignored
+ #print "dest is NONE!\n";
+ }
+ else {
+
+ # Use the ID name to figure out which id number range we
+ # should use for this phrase. Voice-only strings are
+ # separated.
+
+ if($idstr =~ /^VOICE/) {
+ $idnum = $voiceid++;
+ }
+ else {
+ $idnum = $idcount++;
+ }
+
+ $id{$idstr} = $idnum;
+ $idnum[$idnum]=$idstr;
+
+ $source{$idstr}=$src;
+ $dest{$idstr}=$dest;
+ $voice{$idstr}=$voice;
+
+ if($verbose) {
+ print "id: $phrase{id} ($idnum)\n";
+ print "source: $src\n";
+ print "dest: $dest\n";
+ print "voice: $voice\n";
+ }
+
+ undef $src;
+ undef $dest;
+ undef $voice;
+ undef %phrase;
+ }
+
+ if($update) {
+ my $e = $english{$idstr};
+
+ if($e) {
+ # compare original english with this!
+ my @eng = split("\n", $english{$idstr});
+
+ compare($idstr, \@eng, \@phrase);
+
+ $english{$idstr}=""; # clear it
+ }
+ else {
+ print "### $idstr: The phrase is not used. Skipped\n";
+ }
+ }
+ undef @phrase;
+
+ } # end of </phrase>
+
+ # starts with a slash, this _ends_ this section
+ $m = pop @m; # get back old value, the previous level's tag
+ next;
+ } # end of tag close
+
+ # This is an opening (sub) tag
+
+ push @m, $m; # store old value
+ $m = $1;
+ next;
+ }
+
+ if(/^ *([^:]+): *(.*)/) {
+ my ($name, $val)=($1, $2);
+ &$m($_, $name, $val);
+ }
+}
+close(LANG);
+
+if($update) {
+ my $any=0;
+ for(keys %english) {
+ if($english{$_}) {
+ print "###\n",
+ "### This phrase below was not present in the translated file\n",
+ "<phrase>\n";
+ print $english{$_};
+ print "</phrase>\n";
+ }
+ }
+}
+
+if($prefix) {
+ # We create a .c and .h file
+
+ open(HFILE, ">$prefix.h");
+ open(CFILE, ">$prefix.c");
+
+ print HFILE <<MOO
+/* This file was automatically generated using genlang2 */
/*
* The str() macro/functions is how to access strings that might be
* translated. Use it like str(MACRO) and expect a string to be
@@ -37,12 +439,12 @@ extern unsigned char *language_strings[];
extern const unsigned char language_builtin[];
/* The enum below contains all available strings */
-enum {
+enum \{
MOO
;
-print CFILE <<MOO
-/* This file was automaticly generated using genlang, the strings come
+ print CFILE <<MOO
+/* This file was automaticly generated using genlang2, the strings come
from "$input" */
#include "$prefix.h"
@@ -50,87 +452,144 @@ print CFILE <<MOO
unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
const unsigned char language_builtin[] =
MOO
- ;
+;
-open(LANG, "<$input");
-while(<LANG>) {
- $line++;
- if($_ =~ / *\#/) {
- # comment
- next;
+ # Output the ID names for the enum in the header file
+ my $i;
+ for $i (1 .. $idcount) {
+ my $name=$idnum[$i - 1]; # get the ID name
+
+ $name =~ s/\"//g; # cut off the quotes
+
+ printf HFILE (" %s,\n", $name);
}
- # get rid of DOS newlines
- $_ =~ s/\r//g;
- if($_ =~ / *([a-z]+): *(.*)/) {
- ($var, $value) = ($1, $2);
- # print "$var => $value\n";
- $set{$var} = $value;
+# Output separation marker for last string ID and the upcoming voice IDs
- if( (($var eq "new") && $value && ($value !~ /^\"(.*)\"\W*$/)) ||
- (($var eq "voice") && $value && ($value !~ /^\"(.*)\"\W*$/)) ||
- (($var eq "eng") && ($value !~ /^\"(.*)\"\W*$/)) ) {
- print "$input:$line:missing quotes for ".$set{'id'}."\n";
- $errors++;
- next;
- }
+ print HFILE <<MOO
+ LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
+ /* --- below this follows voice-only strings --- */
+ VOICEONLY_DELIMITER = 0x8000,
+MOO
+ ;
- if($var eq "new") {
- # the last one for a single phrase
+# Output the ID names for the enum in the header file
+ my $i;
+ for $i (0x8000 .. ($voiceid-1)) {
+ my $name=$idnum[$i]; # get the ID name
+
+ $name =~ s/\"//g; # cut off the quotes
+
+ printf HFILE (" %s,\n", $name);
+ }
- if(!$value || ($value eq "\"\"") ) {
- # if not set, get the english version
- $value = $set{'eng'};
- }
-# print "VOICE: ".$set{'voice'}." VALUE: $value\n";
- # Note: if both entries are "", the string is deprecated,
- # but must be included to maintain compatibility
- if($set{'id'} =~ /^VOICE_/) {
- # voice-only
- push @vfile, $set{'id'};
- }
- else {
- push @hfile, $set{'id'};
- $value =~ s/^\"(.*)\"\W*$/\"$1\\0\"/;
- print CFILE " $value\n";
- }
+ # Output end of enum
+ print HFILE "\n};\n/* end of generated enum list */\n";
+
+ # Output the target phrases for the source file
+ for $i (1 .. $idcount) {
+ my $name=$idnum[$i - 1]; # get the ID
+ my $dest = $dest{$name}; # get the destination phrase
+
+ $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
- undef %set;
+ if(!$dest) {
+ # this is just to be on the safe side
+ $dest = '"\0"';
}
+ printf CFILE (" %s\n", $dest);
}
-}
-close(LANG);
+# Output end of string chunk
+ print CFILE <<MOO
+;
+/* end of generated string list */
+MOO
+;
-for(@hfile) {
- print HFILE " $_,\n";
+ close(HFILE);
+ close(CFILE);
+} # end of the c/h file generation
+elsif($binary) {
+ # Creation of a binary lang file was requested
+
+ # We must first scan the english file to get the correct order of the id
+ # numbers used there, as that is what sets the id order for all language
+ # files. The english file is scanned before the translated file was
+ # scanned.
+
+ open(OUTF, ">$binary") or die "Can't create $binary";
+ binmode OUTF;
+ printf OUTF ("\x1a%c", $langversion); # magic lang file header
+
+ # loop over the target phrases
+ for $i (1 .. $idcount) {
+ my $name=$idnum[$i - 1]; # get the ID
+ my $dest = $dest{$name}; # get the destination phrase
+
+ if($dest) {
+ $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
+
+ # Now, make sure we get the number from the english sort order:
+ $idnum = $idmap{$name};
+
+ printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
+ if($debug) {
+ printf("%02x => %s\n", $idnum, $value);
+ }
+ }
+ }
}
+elsif($voiceout) {
+ # voice output requested, display id: and voice: strings in a v1-like
+ # fashion
-print HFILE <<MOO
- LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
- /* --- below this follows voice-only strings --- */
- VOICEONLY_DELIMITER = 0x8000,
-MOO
- ;
+ my @engl;
+
+ # This loops over the strings in the translated language file order
+ my @ids = ((0 .. ($idcount-1)));
+ push @ids, (0x8000 .. ($voiceid-1));
+
+ #for my $id (@ids) {
+ # print "$id\n";
+ #}
+
+ for $i (@ids) {
+ my $name=$idnum[$i]; # get the ID
+ my $dest = $voice{$name}; # get the destination voice string
-for(@vfile) {
- print HFILE " $_,\n";
+ if($dest) {
+ $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
+
+ # Now, make sure we get the number from the english sort order:
+ $idnum = $idmap{$name};
+
+ $engl[$idnum] = $i;
+
+ # print "Input index $i output index $idnum\n";
+
+ }
+ }
+ for my $i (@ids) {
+
+ my $o = $engl[$i];
+
+ my $name=$idnum[$o]; # get the ID
+ my $dest = $voice{$name}; # get the destination voice string
+
+ print "#$i\nid: $name\nvoice: $dest\n";
+ }
+
}
-print HFILE <<MOO
-};
-/* end of generated enum list */
-MOO
- ;
-print CFILE <<MOO
-;
-/* end of generated string list */
-MOO
- ;
+if($verbose) {
+ printf("%d ID strings scanned\n", $idcount);
-close(CFILE);
-close(HFILE);
+ print "* head *\n";
+ for(keys %head) {
+ printf "$_: %s\n", $head{$_};
+ }
+}
-exit $errors;