summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtools/sapi5_voice_new.vbs69
-rwxr-xr-xtools/voice.pl97
2 files changed, 110 insertions, 56 deletions
diff --git a/tools/sapi5_voice_new.vbs b/tools/sapi5_voice_new.vbs
index 96c6e2a..f6abcf7 100755
--- a/tools/sapi5_voice_new.vbs
+++ b/tools/sapi5_voice_new.vbs
@@ -20,11 +20,13 @@
'To be done:
' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
-' - Voice specific replacements/corrections for pronounciation (this should be at a higher level really)
+
+Option Explicit
Const SSFMCreateForWrite = 3
-Const SPSF_8kHz16BitMono = 6
+' Audio formats for SAPI5 filestream object
+Const SPSF_8kHz16BitMono = 6
Const SPSF_11kHz16BitMono = 10
Const SPSF_12kHz16BitMono = 14
Const SPSF_16kHz16BitMono = 18
@@ -34,34 +36,59 @@ Const SPSF_32kHz16BitMono = 30
Const SPSF_44kHz16BitMono = 34
Const SPSF_48kHz16BitMono = 38
-Dim oSpVoice, oSpFS, nAudioFormat, sText, sOutputFile
+Dim oShell, oEnv
+Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
+Dim aLine, aData ' used in command reading
+Dim nAudioFormat
+Dim bVerbose
+
+
+On Error Resume Next
nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
'- for AT&T natural voices, use SPSF_32kHz16BitMono
'- for MS voices, use SPSF_22kHz16BitMono
+Set oShell = CreateObject("WScript.Shell")
+Set oEnv = oShell.Environment("Process")
+bVerbose = (oEnv("V") <> "")
+
Set oSpVoice = CreateObject("SAPI.SpVoice")
If Err.Number <> 0 Then
- WScript.Echo "Error - could not get SpVoice object. " & _
- "SAPI 5 not installed?"
+ WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
+ "SAPI 5 not installed?"
Err.Clear
WScript.Quit 1
End If
-While 1 > 0
- sText = WScript.StdIn.ReadLine
- sOutputFile = WScript.StdIn.ReadLine
- If sOutputFile = "" Then
- Set oSpFS = Nothing
- Set oSpVoice = Nothing
- Set oArgs = Nothing
- WScript.Quit 0
+Set oSpFS = CreateObject("SAPI.SpFileStream")
+oSpFS.Format.Type = nAudioFormat
+
+On Error Goto 0
+
+Do
+ aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
+ If Err.Number <> 0 Then
+ WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
+ WScript.Quit 1
End If
- ' WScript.Echo "Saying " + sText + " in " + sOutputFile
- Set oSpFS = CreateObject("SAPI.SpFileStream")
- oSpFS.Format.Type = nAudioFormat
- oSpFS.Open sOutputFile, SSFMCreateForWrite, False
- Set oSpVoice.AudioOutputStream = oSpFS
- oSpVoice.Speak sText
- oSpFS.Close
-Wend
+ Select Case aLine(0) ' command
+ Case "SPEAK"
+ aData = Split(aLine(1), vbTab, 2)
+ If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
+ & " in " & aData(0)
+ oSpFS.Open aData(0), SSFMCreateForWrite, false
+ Set oSpVoice.AudioOutputStream = oSpFS
+ oSpVoice.Speak aData(1)
+ oSpFS.Close
+ Case "EXEC"
+ If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
+ oShell.Run aLine(1), 0, true
+ Case "SYNC"
+ If bVerbose Then WScript.StdErr.WriteLine "Syncing"
+ WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
+ Case "QUIT"
+ If bVerbose Then WScript.StdErr.WriteLine "Quitting"
+ WScript.Quit 0
+ End Select
+Loop
diff --git a/tools/voice.pl b/tools/voice.pl
index 109451f..88f3ba4 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -21,6 +21,7 @@ use File::Basename;
use File::Copy;
use Switch;
use vars qw($V $C $t $l $e $E $s $S $i $v);
+use IPC::Open2;
use IPC::Open3;
use Digest::MD5 qw(md5_hex);
@@ -69,43 +70,44 @@ USAGE
sub init_tts {
our $verbose;
my ($tts_engine, $tts_engine_opts, $language) = @_;
- my $ret = undef;
+ my %ret = ("name" => $tts_engine);
switch($tts_engine) {
case "festival" {
print("> festival $tts_engine_opts --server\n") if $verbose;
my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
- $ret = *FESTIVAL_SERVER;
- $ret = $pid;
+ my $dummy = *FESTIVAL_SERVER; #suppress warning
$SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
+ $ret{"pid"} = $pid;
}
case "sapi5" {
my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`;
chomp($path);
- $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts";
- $path =~ s/\\/\\\\/g;
- print("> cscript /B $path\n") if $verbose;
- my $pid = open(F, "| cscript /B $path");
- $ret = *F;
- $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
- $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
+ $path = $path . '\\';
+ my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
+ $cmd =~ s/\\/\\\\/g;
+ print("> cscript //nologo $cmd\n") if $verbose;
+ my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
+ $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
+ $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
+ %ret = (%ret, "stdin" => *CMD_IN, "stdout" => *CMD_OUT, "toolspath" => $path);
}
}
- return $ret;
+ return \%ret;
}
# Shutdown TTS engine if necessary.
sub shutdown_tts {
- my ($tts_engine, $tts_object) = @_;
- switch($tts_engine) {
+ my ($tts_object) = @_;
+ switch($$tts_object{"name"}) {
case "festival" {
# Send SIGTERM to festival server
- kill TERM => $tts_object;
+ kill TERM => $$tts_object{"pid"};
}
case "sapi5" {
- print($tts_object "\r\n\r\n");
- close($tts_object);
+ print({$$tts_object{"stdin"}} "QUIT\r\n");
+ close($$tts_object{"stdin"});
}
}
}
@@ -113,14 +115,14 @@ sub shutdown_tts {
# Apply corrections to a voice-string to make it sound better
sub correct_string {
our $verbose;
- my ($string, $language, $tts_engine) = @_;
+ my ($string, $language, $tts_object) = @_;
my $orig = $string;
switch($language) {
# General for all engines and languages (perhaps - just an example)
$string =~ s/USB/U S B/;
case ("deutsch") {
- switch($tts_engine) {
+ switch($$tts_object{"name"}) {
$string =~ s/alphabet/alfabet/;
$string =~ s/alkaline/alkalein/;
$string =~ s/ampere/amper/;
@@ -146,10 +148,10 @@ sub correct_string {
# Produce a wav file of the text given
sub voicestring {
our $verbose;
- my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_;
+ my ($string, $output, $tts_engine_opts, $tts_object) = @_;
my $cmd;
- printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose;
- switch($tts_engine) {
+ printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
+ switch($$tts_object{"name"}) {
case "festival" {
# festival_client lies to us, so we have to do awful soul-eating
# work with IPC::open3()
@@ -180,15 +182,31 @@ sub voicestring {
close(ESPEAK);
}
case "sapi5" {
- print($tts_object sprintf("%s\r\n%s\r\n", $string, $output));
+ print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
}
}
}
+# trim leading / trailing silence from the clip
+sub wavtrim {
+ our $verbose;
+ my ($file, $threshold, $tts_object) = @_;
+ printf("Trim \"%s\"\n", $file) if $verbose;
+ if ($$tts_object{"name"} eq "sapi5") {
+ my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
+ print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
+ }
+ else {
+ my $cmd = dirname($0) . "/wavtrim $file $threshold";
+ print("> $cmd\n") if $verbose;
+ `$cmd`;
+ }
+}
+
# Encode a wav file into the given destination file
sub encodewav {
our $verbose;
- my ($input, $output, $encoder, $encoder_opts) = @_;
+ my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
my $cmd = '';
printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
switch ($encoder) {
@@ -202,16 +220,23 @@ sub encodewav {
$cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
}
}
- print("> $cmd\n") if $verbose;
- `$cmd`;
+ if ($$tts_object{"name"} eq "sapi5") {
+ print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
+ }
+ else {
+ print("> $cmd\n") if $verbose;
+ `$cmd`;
+ }
}
-sub wavtrim {
- our $verbose;
- my ($file) = @_;
- my $cmd = dirname($0) . "/wavtrim \"$file\"";
- print("> $cmd\n") if $verbose;
- `$cmd`;
+# synchronize the clip generation / processing if it's running in another process
+sub synchronize {
+ my ($tts_object) = @_;
+ if ($$tts_object{"name"} eq "sapi5") {
+ print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
+ my $wait = readline($$tts_object{"stdout"});
+ #ignore what's actually returned
+ }
}
# Run genlang and create voice clips for each string
@@ -267,11 +292,13 @@ sub generateclips {
copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
}
else {
- voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object);
- wavtrim($wav, 500); # 500 seems to be a reasonable default for now
+ voicestring($voice, $wav, $tts_engine_opts, $tts_object);
+ wavtrim($wav, 500, $tts_object);
+ # 500 seems to be a reasonable default for now
}
- encodewav($wav, $mp3, $encoder, $encoder_opts);
+ encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
+ synchronize($tts_object);
if (defined($ENV{'POOL'})) {
copy($mp3, $pool_file);
}
@@ -284,7 +311,7 @@ sub generateclips {
}
print("\n");
close(VOICEFONTIDS);
- shutdown_tts($tts_engine, $tts_object);
+ shutdown_tts($tts_object);
}
# Assemble the voicefile