2 files changed, 110 insertions, 56 deletions
diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs
index 96c6e2a..f6abcf7 100755
--- a/tools/sapi5_voice_new.vbs
+++ b/tools/sapi5_voice_new.vbs
@@ -20,11 +20,13 @@
 
 'To be done:
 ' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
-' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really)
+
+Option Explicit
 
 Const SSFMCreateForWrite = 3
 
-Const SPSF_8kHz16BitMono = 6
+' Audio formats for SAPI5 filestream object
+Const SPSF_8kHz16BitMono  = 6
 Const SPSF_11kHz16BitMono = 10
 Const SPSF_12kHz16BitMono = 14
 Const SPSF_16kHz16BitMono = 18
@@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30
 Const SPSF_44kHz16BitMono = 34
 Const SPSF_48kHz16BitMono = 38
 
-Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile
+Dim oShell, oEnv
+Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
+Dim aLine, aData    ' used in command reading
+Dim nAudioFormat   
+Dim bVerbose
+
+
+On Error Resume Next
 
 nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
 '- for AT&T natural voices, use SPSF_32kHz16BitMono
 '- for MS voices, use SPSF_22kHz16BitMono
 
+Set oShell = CreateObject("WScript.Shell")
+Set oEnv = oShell.Environment("Process")
+bVerbose = (oEnv("V") <> "")
+
 Set oSpVoice = CreateObject("SAPI.SpVoice")
 If Err.Number <> 0 Then
-    WScript.Echo "Error - could not get SpVoice object. " & _
-    "SAPI 5 not installed?"
+    WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
+                             "SAPI 5 not installed?"
     Err.Clear
     WScript.Quit 1
 End If
 
-While 1 > 0
-    sText = WScript.StdIn.ReadLine
-    sOutputFile = WScript.StdIn.ReadLine
-    If sOutputFile = "" Then
-        Set oSpFS = Nothing
-        Set oSpVoice = Nothing
-        Set oArgs = Nothing
-        WScript.Quit 0
+Set oSpFS = CreateObject("SAPI.SpFileStream")
+oSpFS.Format.Type = nAudioFormat
+
+On Error Goto 0
+
+Do
+    aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
+    If Err.Number <> 0 Then
+        WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
+        WScript.Quit 1
     End If
-    ' WScript.Echo "Saying " + sText + " in " + sOutputFile
-    Set oSpFS = CreateObject("SAPI.SpFileStream")
-    oSpFS.Format.Type = nAudioFormat
-    oSpFS.Open sOutputFile, SSFMCreateForWrite, False
-    Set oSpVoice.AudioOutputStream = oSpFS
-    oSpVoice.Speak sText
-    oSpFS.Close
-Wend
+    Select Case aLine(0) ' command
+        Case "SPEAK"
+            aData = Split(aLine(1), vbTab, 2)
+            If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
+                                                      & " in " & aData(0)
+            oSpFS.Open aData(0), SSFMCreateForWrite, false
+            Set oSpVoice.AudioOutputStream = oSpFS
+            oSpVoice.Speak aData(1)
+            oSpFS.Close
+        Case "EXEC"
+            If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
+            oShell.Run aLine(1), 0, true
+        Case "SYNC"
+            If bVerbose Then WScript.StdErr.WriteLine "Syncing"
+            WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
+        Case "QUIT"
+            If bVerbose Then WScript.StdErr.WriteLine "Quitting"
+            WScript.Quit 0
+    End Select
+Loop
diff --git a/tools/voice.pl b/tools/voice.pl
index 109451f..88f3ba4 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -21,6 +21,7 @@ use File::Basename;
 use File::Copy;
 use Switch;
 use vars qw($V $C $t $l $e $E $s $S $i $v);
+use IPC::Open2;
 use IPC::Open3;
 use Digest::MD5 qw(md5_hex);
 
@@ -69,43 +70,44 @@ USAGE
 sub init_tts {
     our $verbose;
     my ($tts_engine, $tts_engine_opts, $language) = @_;
-    my $ret = undef;
+    my %ret = ("name" => $tts_engine);
     switch($tts_engine) {
         case "festival" {
             print("> festival $tts_engine_opts --server\n") if $verbose;
             my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
-            $ret = *FESTIVAL_SERVER;
-            $ret = $pid;
+            my $dummy = *FESTIVAL_SERVER; #suppress warning
             $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
             $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
+            $ret{"pid"} = $pid;
         }
         case "sapi5" {
             my $toolsdir = dirname($0);
             my $path = `cygpath $toolsdir -a -w`;
             chomp($path);
-            $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts";
-            $path =~ s/\\/\\\\/g;
-            print("> cscript /B $path\n") if $verbose;
-            my $pid = open(F, "| cscript /B $path");
-            $ret = *F;
-            $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
-            $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
+            $path = $path . '\\';
+            my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
+            $cmd =~ s/\\/\\\\/g;
+            print("> cscript //nologo $cmd\n") if $verbose;
+            my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
+            $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
+            $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
+            %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path);
         }
     }
-    return $ret;
+    return \%ret;
 }
 
 # Shutdown TTS engine if necessary.
 sub shutdown_tts {
-    my ($tts_engine, $tts_object) = @_;
-    switch($tts_engine) {
+    my ($tts_object) = @_;
+    switch($$tts_object{"name"}) {
         case "festival" {
             # Send SIGTERM to festival server
-            kill TERM => $tts_object;
+            kill TERM => $$tts_object{"pid"};
         }
         case "sapi5" {
-            print($tts_object "\r\n\r\n");
-            close($tts_object);
+            print({$$tts_object{"stdin"}} "QUIT\r\n");
+            close($$tts_object{"stdin"});
         }
     }
 }
@@ -113,14 +115,14 @@ sub shutdown_tts {
 # Apply corrections to a voice-string to make it sound better
 sub correct_string {
     our $verbose;
-    my ($string, $language, $tts_engine) = @_;
+    my ($string, $language, $tts_object) = @_;
     my $orig = $string;
     switch($language) {
         # General for all engines and languages (perhaps - just an example)
         $string =~ s/USB/U S B/;
 
         case ("deutsch") {
-            switch($tts_engine) {
+            switch($$tts_object{"name"}) {
                 $string =~ s/alphabet/alfabet/;
                 $string =~ s/alkaline/alkalein/;
                 $string =~ s/ampere/amper/;
@@ -146,10 +148,10 @@ sub correct_string {
 # Produce a wav file of the text given
 sub voicestring {
     our $verbose;
-    my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_;
+    my ($string, $output, $tts_engine_opts, $tts_object) = @_;
     my $cmd;
-    printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose;
-    switch($tts_engine) {
+    printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
+    switch($$tts_object{"name"}) {
         case "festival" {
             # festival_client lies to us, so we have to do awful soul-eating
             # work with IPC::open3()
@@ -180,15 +182,31 @@ sub voicestring {
             close(ESPEAK);
         }
         case "sapi5" {
-            print($tts_object sprintf("%s\r\n%s\r\n", $string, $output));
+            print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
         }
     }
 }
 
+# trim leading / trailing silence from the clip
+sub wavtrim {
+    our $verbose;
+    my ($file, $threshold, $tts_object) = @_;
+    printf("Trim \"%s\"\n", $file) if $verbose;
+    if ($$tts_object{"name"} eq "sapi5") {
+        my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
+        print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
+    }
+    else {
+        my $cmd = dirname($0) . "/wavtrim $file $threshold";
+        print("> $cmd\n") if $verbose;
+        `$cmd`;
+    }
+}
+
 # Encode a wav file into the given destination file
 sub encodewav {
     our $verbose;
-    my ($input, $output, $encoder, $encoder_opts) = @_;
+    my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
     my $cmd = '';
     printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
     switch ($encoder) {
@@ -202,16 +220,23 @@ sub encodewav {
             $cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
         }
     }
-    print("> $cmd\n") if $verbose;
-    `$cmd`;
+    if ($$tts_object{"name"} eq "sapi5") {
+        print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
+    }
+    else {
+        print("> $cmd\n") if $verbose;
+        `$cmd`;
+    }
 }
 
-sub wavtrim {
-    our $verbose;
-    my ($file) = @_;
-    my $cmd = dirname($0) . "/wavtrim \"$file\"";
-    print("> $cmd\n") if $verbose;
-    `$cmd`;
+# synchronize the clip generation / processing if it's running in another process
+sub synchronize {
+    my ($tts_object) = @_;
+    if ($$tts_object{"name"} eq "sapi5") {
+        print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
+        my $wait = readline($$tts_object{"stdout"});
+        #ignore what's actually returned
+    }
 }
 
 # Run genlang and create voice clips for each string
@@ -267,11 +292,13 @@ sub generateclips {
                         copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
                     }
                     else {
-                        voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object);
-                        wavtrim($wav, 500); # 500 seems to be a reasonable default for now
+                        voicestring($voice, $wav, $tts_engine_opts, $tts_object);
+                        wavtrim($wav, 500, $tts_object);
+                        # 500 seems to be a reasonable default for now
                     }
 
-                    encodewav($wav, $mp3, $encoder, $encoder_opts);
+                    encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
+                    synchronize($tts_object);
                     if (defined($ENV{'POOL'})) {
                         copy($mp3, $pool_file);
                     }
@@ -284,7 +311,7 @@ sub generateclips {
     }
     print("\n");
     close(VOICEFONTIDS);
-    shutdown_tts($tts_engine, $tts_object);
+    shutdown_tts($tts_object);
 }
 
 # Assemble the voicefile