add make TLG binary & script

pull/1599/head
Yang Zhou 2 years ago
parent ad8ec177ef
commit 15f434a5d3

@ -0,0 +1,59 @@
#!/bin/bash
# To be run from one directory above this script.
. ./path.sh
text=data/local/lm/text
lexicon=data/local/dict/lexicon.txt
for f in "$text" "$lexicon"; do
[ ! -f $x ] && echo "$0: No such file $f" && exit 1;
done
# Check SRILM tools
if ! which ngram-count > /dev/null; then
echo "srilm tools are not found, please download it and install it from: "
echo "http://www.speech.sri.com/projects/srilm/download.html"
echo "Then add the tools to your PATH"
exit 1
fi
# This script takes no arguments. It assumes you have already run
# aishell_data_prep.sh.
# It takes as input the files
# data/local/lm/text
# data/local/dict/lexicon.txt
dir=data/local/lm
mkdir -p $dir
cleantext=$dir/text.no_oov
cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
{for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
> $cleantext || exit 1;
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
sort -nr > $dir/word.counts || exit 1;
# Get counts from acoustic training transcripts, and add one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
heldout_sent=10000 # Don't change this if you want result to be comparable with
# kaldi_lm results
mkdir -p $dir
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
head -$heldout_sent > $dir/heldout
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
tail -n +$heldout_sent > $dir/train
ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
-map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
ngram -lm $dir/lm.arpa -ppl $dir/heldout

@ -0,0 +1,31 @@
#!/bin/bash
. ./path.sh || exit 1;
. tools/parse_options.sh || exit 1;
data=/mnt/dataset/aishell
# Optionally, you can add LM and test it with runtime.
dir=./ds2_graph
dict=$dir/vocab.txt
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
# 7.1 Prepare dict
unit_file=$dict
mkdir -p $dir/local/dict
cp $unit_file $dir/local/dict/units.txt
tools/fst/prepare_dict.py $unit_file ${data}/resource_aishell/lexicon.txt \
$dir/local/dict/lexicon.txt
# Train lm
lm=$dir/local/lm
mkdir -p $lm
tools/filter_scp.pl data/train/text \
$data/data_aishell/transcript/aishell_transcript_v0.8.txt > $lm/text
local/ds2_aishell_train_lms.sh
# Build decoding TLG
tools/fst/compile_lexicon_token_fst.sh \
$dir/local/dict $dir/local/tmp $dir/local/lang
tools/fst/make_tlg.sh $dir/local/lm $dir/local/lang $dir/lang_test || exit 1;
fi

@ -0,0 +1,14 @@
# This contains the locations of binarys build required for running the examples.
SPEECHX_ROOT=$PWD/../..
SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
SPEECHX_TOOLS=$SPEECHX_ROOT/tools
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
export LC_AL=C
SPEECHX_BIN=$SPEECHX_EXAMPLES/feat
export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN

@ -0,0 +1,195 @@
#!/usr/bin/env perl
# Copyright 2010-2011 Microsoft Corporation
# 2013-2016 Johns Hopkins University (author: Daniel Povey)
# 2015 Hainan Xu
# 2015 Guoguo Chen
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Adds disambiguation symbols to a lexicon.
# Outputs still in the normal lexicon format.
# Disambig syms are numbered #1, #2, #3, etc. (#0
# reserved for symbol in grammar).
# Outputs the number of disambig syms to the standard output.
# With the --pron-probs option, expects the second field
# of each lexicon line to be a pron-prob.
# With the --sil-probs option, expects three additional
# fields after the pron-prob, representing various components
# of the silence probability model.
$pron_probs = 0;
$sil_probs = 0;
$first_allowed_disambig = 1;
for ($n = 1; $n <= 3 && @ARGV > 0; $n++) {
if ($ARGV[0] eq "--pron-probs") {
$pron_probs = 1;
shift @ARGV;
}
if ($ARGV[0] eq "--sil-probs") {
$sil_probs = 1;
shift @ARGV;
}
if ($ARGV[0] eq "--first-allowed-disambig") {
$first_allowed_disambig = 0 + $ARGV[1];
if ($first_allowed_disambig < 1) {
die "add_lex_disambig.pl: invalid --first-allowed-disambig option: $first_allowed_disambig\n";
}
shift @ARGV;
shift @ARGV;
}
}
if (@ARGV != 2) {
die "Usage: add_lex_disambig.pl [opts] <lexicon-in> <lexicon-out>\n" .
"This script adds disambiguation symbols to a lexicon in order to\n" .
"make decoding graphs determinizable; it adds pseudo-phone\n" .
"disambiguation symbols #1, #2 and so on at the ends of phones\n" .
"to ensure that all pronunciations are different, and that none\n" .
"is a prefix of another.\n" .
"It prints to the standard output the number of the largest-numbered" .
"disambiguation symbol that was used.\n" .
"\n" .
"Options: --pron-probs Expect pronunciation probabilities in the 2nd field\n" .
" --sil-probs [should be with --pron-probs option]\n" .
" Expect 3 extra fields after the pron-probs, for aspects of\n" .
" the silence probability model\n" .
" --first-allowed-disambig <n> The number of the first disambiguation symbol\n" .
" that this script is allowed to add. By default this is\n" .
" #1, but you can set this to a larger value using this option.\n" .
"e.g.:\n" .
" add_lex_disambig.pl lexicon.txt lexicon_disambig.txt\n" .
" add_lex_disambig.pl --pron-probs lexiconp.txt lexiconp_disambig.txt\n" .
" add_lex_disambig.pl --pron-probs --sil-probs lexiconp_silprob.txt lexiconp_silprob_disambig.txt\n";
}
$lexfn = shift @ARGV;
$lexoutfn = shift @ARGV;
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
# (1) Read in the lexicon.
@L = ( );
while(<L>) {
@A = split(" ", $_);
push @L, join(" ", @A);
}
# (2) Work out the count of each phone-sequence in the
# lexicon.
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
if ($pron_probs) {
$p = shift @A;
if (!($p > 0.0 && $p <= 1.0)) { die "Bad lexicon line $l (expecting pron-prob as second field)"; }
}
if ($sil_probs) {
$silp = shift @A;
if (!($silp > 0.0 && $silp <= 1.0)) { die "Bad lexicon line $l for silprobs"; }
$correction = shift @A;
if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
$correction = shift @A;
if ($correction <= 0.0) { die "Bad lexicon line $l for silprobs"; }
}
if (!(@A)) {
die "Bad lexicon line $1, no phone in phone list";
}
$count{join(" ",@A)}++;
}
# (3) For each left sub-sequence of each phone-sequence, note down
# that it exists (for identifying prefixes of longer strings).
foreach $l (@L) {
@A = split(" ", $l);
shift @A; # Remove word.
if ($pron_probs) { shift @A; } # remove pron-prob.
if ($sil_probs) {
shift @A; # Remove silprob
shift @A; # Remove silprob
}
while(@A > 0) {
pop @A; # Remove last phone
$issubseq{join(" ",@A)} = 1;
}
}
# (4) For each entry in the lexicon:
# if the phone sequence is unique and is not a
# prefix of another word, no diambig symbol.
# Else output #1, or #2, #3, ... if the same phone-seq
# has already been assigned a disambig symbol.
open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
# max_disambig will always be the highest-numbered disambiguation symbol that
# has been used so far.
$max_disambig = $first_allowed_disambig - 1;
foreach $l (@L) {
@A = split(" ", $l);
$word = shift @A;
if ($pron_probs) {
$pron_prob = shift @A;
}
if ($sil_probs) {
$sil_word_prob = shift @A;
$word_sil_correction = shift @A;
$prev_nonsil_correction = shift @A
}
$phnseq = join(" ", @A);
if (!defined $issubseq{$phnseq}
&& $count{$phnseq} == 1) {
; # Do nothing.
} else {
if ($phnseq eq "") { # need disambig symbols for the empty string
# that are not use anywhere else.
$max_disambig++;
$reserved_for_the_empty_string{$max_disambig} = 1;
$phnseq = "#$max_disambig";
} else {
$cur_disambig = $last_used_disambig_symbol_of{$phnseq};
if (!defined $cur_disambig) {
$cur_disambig = $first_allowed_disambig;
} else {
$cur_disambig++; # Get a number that has not been used yet for
# this phone sequence.
}
while (defined $reserved_for_the_empty_string{$cur_disambig}) {
$cur_disambig++;
}
if ($cur_disambig > $max_disambig) {
$max_disambig = $cur_disambig;
}
$last_used_disambig_symbol_of{$phnseq} = $cur_disambig;
$phnseq = $phnseq . " #" . $cur_disambig;
}
}
if ($pron_probs) {
if ($sil_probs) {
print O "$word\t$pron_prob\t$sil_word_prob\t$word_sil_correction\t$prev_nonsil_correction\t$phnseq\n";
} else {
print O "$word\t$pron_prob\t$phnseq\n";
}
} else {
print O "$word\t$phnseq\n";
}
}
print $max_disambig . "\n";

@ -0,0 +1,86 @@
#!/bin/bash
# Copyright 2015 Yajie Miao (Carnegie Mellon University)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
# phoneme and character-based lexicons.
set -eo pipefail
. tools/parse_options.sh
if [ $# -ne 3 ]; then
echo "usage: tools/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
echo "e.g.: tools/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
echo "<dict-src-dir> should contain the following files:"
echo "lexicon.txt lexicon_numbers.txt units.txt"
echo "options: "
exit 1;
fi
srcdir=$1
tmpdir=$2
dir=$3
mkdir -p $dir $tmpdir
[ -f path.sh ] && . ./path.sh
cp $srcdir/units.txt $dir
# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;
# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
# Without these symbols, determinization will fail.
ndisambig=`tools/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
ndisambig=$[$ndisambig+1];
( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list
# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
# the actual model unit, and the disambiguation symbols.
cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt
# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
# so here just use simple ctc_token_fst
tools/fst/ctc_token_fst.py $dir/tokens.txt | \
fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;
# Encode the words with indices. Will be used in lexicon and language model FST compiling.
cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | awk '
BEGIN {
print "<eps> 0";
}
{
printf("%s %d\n", $1, NR);
}
END {
printf("#0 %d\n", NR+1);
printf("<s> %d\n", NR+2);
printf("</s> %d\n", NR+3);
}' > $dir/words.txt || exit 1;
# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
tools/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
--keep_isymbols=false --keep_osymbols=false | \
fstaddselfloops "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
echo "Lexicon and token FSTs compiling succeeded"

@ -0,0 +1,24 @@
#!/usr/bin/env python
import sys
print('0 1 <eps> <eps>')
print('1 1 <blank> <eps>')
print('2 2 <blank> <eps>')
print('2 0 <eps> <eps>')
with open(sys.argv[1], 'r') as fin:
node = 3
for entry in fin:
fields = entry.strip().split(' ')
phone = fields[0]
if phone == '<eps>' or phone == '<blank>':
continue
elif '#' in phone: # disambiguous phone
print('{} {} {} {}'.format(0, 0, '<eps>', phone))
else:
print('{} {} {} {}'.format(1, node, phone, phone))
print('{} {} {} {}'.format(node, node, phone, '<eps>'))
print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
node += 1
print('0')

@ -0,0 +1,21 @@
#!/usr/bin/env python
import sys
print('0 0 <blank> <eps>')
with open(sys.argv[1], 'r', encoding='utf8') as fin:
node = 1
for entry in fin:
fields = entry.strip().split(' ')
phone = fields[0]
if phone == '<eps>' or phone == '<blank>':
continue
elif '#' in phone: # disambiguous phone
print('{} {} {} {}'.format(0, 0, '<eps>', phone))
else:
print('{} {} {} {}'.format(0, node, phone, phone))
print('{} {} {} {}'.format(node, node, phone, '<eps>'))
print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
node += 1
print('0')

@ -0,0 +1,55 @@
#!/usr/bin/env python
import sys
def il(n):
return n + 1
def ol(n):
return n + 1
def s(n):
return n
if __name__ == "__main__":
with open(sys.argv[1]) as f:
lines = f.readlines()
phone_count = 0
disambig_count = 0
for line in lines:
sp = line.split()
phone = sp[0]
if phone == '<eps>' or phone == '<blank>':
continue
if phone.startswith('#'):
disambig_count += 1
else:
phone_count += 1
# 1. add start state
print('0 0 {} 0'.format(il(0)))
# 2. 0 -> i, i -> i, i -> 0
for i in range(1, phone_count + 1):
print('0 {} {} {}'.format(s(i), il(i), ol(i)))
print('{} {} {} 0'.format(s(i), s(i), il(i)))
print('{} 0 {} 0'.format(s(i), il(0)))
# 3. i -> other phone
for i in range(1, phone_count + 1):
for j in range(1, phone_count + 1):
if i != j:
print('{} {} {} {}'.format(s(i), s(j), il(j), ol(j)))
# 4. add disambiguous arcs on every final state
for i in range(0, phone_count + 1):
for j in range(phone_count + 2, phone_count + disambig_count + 2):
print('{} {} {} {}'.format(s(i), s(i), 0, j))
# 5. every i is final state
for i in range(0, phone_count + 1):
print(s(i))

@ -0,0 +1,29 @@
#!/usr/bin/env perl
# Copyright 2010-2011 Microsoft Corporation
# 2015 Guoguo Chen
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script replaces epsilon with #0 on the input side only, of the G.fst
# acceptor.
while(<>){
if (/\s+#0\s+/) {
print STDERR "$0: ERROR: LM has word #0, " .
"which is reserved as disambiguation symbol\n";
exit 1;
}
s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
print;
}

@ -0,0 +1,155 @@
#!/usr/bin/env perl
use warnings; #sed replacement for -w perl parameter
# Copyright 2010-2011 Microsoft Corporation
# 2013 Johns Hopkins University (author: Daniel Povey)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# makes lexicon FST, in text form, from lexicon (pronunciation probabilities optional).
$pron_probs = 0;
if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) {
$pron_probs = 1;
shift @ARGV;
}
if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n";
print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n";
print STDERR "Note: ordinarily, each line of lexicon.txt is:\n";
print STDERR " word phone1 phone2 ... phoneN;\n";
print STDERR "if the --pron-probs option is used, each line is:\n";
print STDERR " word pronunciation-probability phone1 phone2 ... phoneN.\n\n";
print STDERR "The probability 'prob' will typically be between zero and one, and note that\n";
print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n";
print STDERR "this is your responsibility.\n\n";
print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n";
print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n";
print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n";
exit(1);
}
$lexfn = shift @ARGV;
if (@ARGV == 0) {
$silprob = 0.0;
} elsif (@ARGV == 2) {
($silprob,$silphone) = @ARGV;
} else {
($silprob,$silphone,$sildisambig) = @ARGV;
}
if ($silprob != 0.0) {
$silprob < 1.0 || die "Sil prob cannot be >= 1.0";
$silcost = -log($silprob);
$nosilcost = -log(1.0 - $silprob);
}
open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
if ( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
$loopstate = 0;
$nextstate = 1; # next unallocated state.
while (<L>) {
@A = split(" ", $_);
@A == 0 && die "Empty lexicon line.";
foreach $a (@A) {
if ($a eq "<eps>") {
die "Bad lexicon line $_ (<eps> is forbidden)";
}
}
$w = shift @A;
if (! $pron_probs) {
$pron_cost = 0.0;
} else {
$pron_prob = shift @A;
if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
die "Bad pronunciation probability in line $_";
}
$pron_cost = -log($pron_prob);
}
if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if (@A > 0) {
$ns = $nextstate++;
} else {
$ns = $loopstate;
}
print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
$word_or_eps = "<eps>";
$pron_cost_string = ""; # so we only print it on the first arc of the word.
$s = $ns;
}
}
print "$loopstate\t0\n"; # final-cost.
} else { # have silence probs.
$startstate = 0;
$loopstate = 1;
$silstate = 2; # state from where we go to loopstate after emitting silence.
print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
if (!defined $sildisambig) {
print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
$nextstate = 3;
} else {
$disambigstate = 3;
$nextstate = 4;
print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
}
while (<L>) {
@A = split(" ", $_);
$w = shift @A;
if (! $pron_probs) {
$pron_cost = 0.0;
} else {
$pron_prob = shift @A;
if (! defined $pron_prob || !($pron_prob > 0.0 && $pron_prob <= 1.0)) {
die "Bad pronunciation probability in line $_";
}
$pron_cost = -log($pron_prob);
}
if ($pron_cost != 0.0) { $pron_cost_string = "\t$pron_cost"; } else { $pron_cost_string = ""; }
$s = $loopstate;
$word_or_eps = $w;
while (@A > 0) {
$p = shift @A;
if (@A > 0) {
$ns = $nextstate++;
print "$s\t$ns\t$p\t$word_or_eps$pron_cost_string\n";
$word_or_eps = "<eps>";
$pron_cost_string = ""; $pron_cost = 0.0; # so we only print it the 1st time.
$s = $ns;
} elsif (!defined($silphone) || $p ne $silphone) {
# This is non-deterministic but relatively compact,
# and avoids epsilons.
$local_nosilcost = $nosilcost + $pron_cost;
$local_silcost = $silcost + $pron_cost;
print "$s\t$loopstate\t$p\t$word_or_eps\t$local_nosilcost\n";
print "$s\t$silstate\t$p\t$word_or_eps\t$local_silcost\n";
} else {
# no point putting opt-sil after silence word.
print "$s\t$loopstate\t$p\t$word_or_eps$pron_cost_string\n";
}
}
}
print "$loopstate\t0\n"; # final-cost.
}

@ -0,0 +1,38 @@
#!/bin/bash
#
if [ -f path.sh ]; then . path.sh; fi
lm_dir=$1
src_lang=$2
tgt_lang=$3
arpa_lm=${lm_dir}/lm.arpa
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
rm -rf $tgt_lang
cp -r $src_lang $tgt_lang
# Compose the language model to FST
cat $arpa_lm | \
grep -v '<s> <s>' | \
grep -v '</s> <s>' | \
grep -v '</s> </s>' | \
grep -v -i '<unk>' | \
grep -v -i '<spoken_noise>' | \
arpa2fst --read-symbol-table=$tgt_lang/words.txt --keep-symbols=true - | fstprint | \
tools/fst/eps2disambig.pl | tools/fst/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
--osymbols=$tgt_lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
echo "Checking how stochastic G is (the first of these numbers should be small):"
fstisstochastic $tgt_lang/G.fst
# Compose the token, lexicon and language-model FST into the final decoding graph
fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
echo "Composing decoding graph TLG.fst succeeded"
#rm -r $tgt_lang/LG.fst # We don't need to keep this intermediate FST

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# encoding: utf-8
import sys
# sys.argv[1]: e2e model unit file(lang_char.txt)
# sys.argv[2]: raw lexicon file
# sys.argv[3]: output lexicon file
# sys.argv[4]: bpemodel
unit_table = set()
with open(sys.argv[1], 'r', encoding='utf8') as fin:
for line in fin:
unit = line.split()[0]
unit_table.add(unit)
def contain_oov(units):
for unit in units:
if unit not in unit_table:
return True
return False
bpemode = len(sys.argv) > 4
if bpemode:
import sentencepiece as spm
sp = spm.SentencePieceProcessor()
sp.Load(sys.argv[4])
lexicon_table = set()
with open(sys.argv[2], 'r', encoding='utf8') as fin, \
open(sys.argv[3], 'w', encoding='utf8') as fout:
for line in fin:
word = line.split()[0]
if word == 'SIL' and not bpemode: # `sil` might be a valid piece in bpemodel
continue
elif word == '<SPOKEN_NOISE>':
continue
else:
# each word only has one pronunciation for e2e system
if word in lexicon_table:
continue
if bpemode:
pieces = sp.EncodeAsPieces(word)
if contain_oov(pieces):
print(
'Ignoring words {}, which contains oov unit'.format(
''.join(word).strip(''))
)
continue
chars = ' '.join(
[p if p in unit_table else '<unk>' for p in pieces])
else:
# ignore words with OOV
if contain_oov(word):
print('Ignoring words {}, which contains oov unit'.format(word))
continue
# Optional, append ▁ in front of english word
# we assume the model unit of our e2e system is char now.
if word.encode('utf8').isalpha() and '' in unit_table:
word = '' + word
chars = ' '.join(word) # word is a char list
fout.write('{} {}\n'.format(word, chars))
lexicon_table.add(word)

@ -0,0 +1,43 @@
#!/usr/bin/env perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script removes lines that contain these OOVs on either the
# third or fourth fields of the line. It is intended to remove arcs
# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
if ( @ARGV < 1 && @ARGV > 2) {
die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
}
$unklist = shift @ARGV;
open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
while(<S>){
@A = split(" ", $_);
@A == 1 || die "Bad line in unknown-symbol list: $_";
$unk{$A[0]} = 1;
}
$num_removed = 0;
while(<>){
@A = split(" ", $_);
if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
$num_removed++;
} else {
print;
}
}
print STDERR "remove_oovs.pl: removed $num_removed lines.\n";

@ -0,0 +1,17 @@
#!/usr/bin/env python
import sys
print('0 0 <blank> <eps>')
with open(sys.argv[1], 'r', encoding='utf8') as fin:
for entry in fin:
fields = entry.strip().split(' ')
phone = fields[0]
if phone == '<eps>' or phone == '<blank>':
continue
elif '#' in phone: # disambiguous phone
print('{} {} {} {}'.format(0, 0, '<eps>', phone))
else:
print('{} {} {} {}'.format(0, 0, phone, phone))
print('0')

@ -0,0 +1,27 @@
#!/usr/bin/env perl
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# This script replaces <s> and </s> with <eps> (on both input and output sides),
# for the G.fst acceptor.
while(<>){
@A = split(" ", $_);
if ( @A >= 4 ) {
if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
}
print join("\t", @A) . "\n";
}

@ -0,0 +1,97 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey);
# Arnab Ghoshal, Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Parse command-line options.
# To be sourced by another script (as in ". parse_options.sh").
# Option format is: --option-name arg
# and shell variable "option_name" gets set to value "arg."
# The exception is --help, which takes no arguments, but prints the
# $help_message variable (if defined).
###
### The --config file options have lower priority to command line
### options, so we need to import them first...
###
# Now import all the configs specified by command-line, in left-to-right order
for ((argpos=1; argpos<$#; argpos++)); do
if [ "${!argpos}" == "--config" ]; then
argpos_plus1=$((argpos+1))
config=${!argpos_plus1}
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
. $config # source the config file.
fi
done
###
### No we process the command line options
###
while true; do
[ -z "${1:-}" ] && break; # break if there are no arguments
case "$1" in
# If the enclosing script is called with --help option, print the help
# message and exit. Scripts should put help messages in $help_message
--help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
else printf "$help_message\n" 1>&2 ; fi;
exit 0 ;;
--*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
exit 1 ;;
# If the first command-line argument begins with "--" (e.g. --foo-bar),
# then work out the variable name as $name, which will equal "foo_bar".
--*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
# Next we test whether the variable in question is undefned-- if so it's
# an invalid option and we die. Note: $0 evaluates to the name of the
# enclosing script.
# The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
# is undefined. We then have to wrap this test inside "eval" because
# foo_bar is itself inside a variable ($name).
eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
oldval="`eval echo \\$$name`";
# Work out whether we seem to be expecting a Boolean argument.
if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
eval $name=\"$2\";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
;;
*) break;
esac
done
# Check for an empty argument to the --cmd option, which can easily occur as a
# result of scripting errors.
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
true; # so this script returns exit code 0.

@ -27,6 +27,8 @@ DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
DEFINE_string(word_symbol_table, "vocab.txt", "word symbol table");
DEFINE_string(graph_path, "TLG", "decoder graph");
DEFINE_double(acoustic_scale, 1.0, "acoustic scale");
DEFINE_int32(max_active, 5000, "decoder graph");
using kaldi::BaseFloat;
@ -49,11 +51,13 @@ int main(int argc, char* argv[]) {
ppspeech::TLGDecoderOptions opts;
opts.word_symbol_table = word_symbol_table;
opts.fst_path = graph_path;
opts.opts.max_active = FLAGS_max_active;
ppspeech::TLGDecoder decoder(opts);
ppspeech::ModelOptions model_opts;
model_opts.model_path = model_graph;
model_opts.params_path = model_params;
model_opts.cache_shape = "5-1-1024,5-1-1024";
std::shared_ptr<ppspeech::PaddleNnet> nnet(
new ppspeech::PaddleNnet(model_opts));
std::shared_ptr<ppspeech::RawDataCache> raw_data(

@ -107,7 +107,7 @@ void CMVN::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
}
bool CMVN::Read(kaldi::Vector<BaseFloat>* feats) {
if (base_extractor_->Read(feats) == false) {
if (base_extractor_->Read(feats) == false || feats->Dim() == 0) {
return false;
}
Compute(feats);

@ -36,9 +36,9 @@ int32 Decodable::NumFramesReady() const {
}
bool Decodable::IsLastFrame(int32 frame) {
CHECK_LE(frame, frames_ready_);
bool flag = EnsureFrameHaveComputed(frame);
return (flag == false) && (frame == frames_ready_ - 1);
//CHECK_LE(frame, frames_ready_);
return (flag == false) && (frame == frames_ready_);
}
int32 Decodable::NumIndices() const { return 0; }

@ -0,0 +1,100 @@
// fstbin/fstaddselfloops.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/parse-options.h"
#include "util/simple-io-funcs.h"
/* some test examples:
pushd ~/tmpdir
( echo 3; echo 4) > in.list
( echo 5; echo 6) > out.list
( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
| fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
fstcompile | fstaddselfloops in.list out.list | fstprint
*/
int main(int argc, char *argv[]) {
try {
using namespace kaldi; // NOLINT
using namespace fst; // NOLINT
using kaldi::int32;
const char *usage =
"Adds self-loops to states of an FST to propagate disambiguation "
"symbols through it\n"
"They are added on each final state and each state with non-epsilon "
"output symbols\n"
"on at least one arc out of the state. Useful in conjunction with "
"predeterminize\n"
"\n"
"Usage: fstaddselfloops in-disambig-list out-disambig-list [in.fst "
"[out.fst] ]\n"
"E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
"in.list and out.list are lists of integers, one per line, of the\n"
"same length.\n";
ParseOptions po(usage);
po.Read(argc, argv);
if (po.NumArgs() < 2 || po.NumArgs() > 4) {
po.PrintUsage();
exit(1);
}
std::string disambig_in_rxfilename = po.GetArg(1),
disambig_out_rxfilename = po.GetArg(2),
fst_in_filename = po.GetOptArg(3),
fst_out_filename = po.GetOptArg(4);
VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
std::vector<int32> disambig_in;
if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
KALDI_ERR
<< "fstaddselfloops: Could not read disambiguation symbols from "
<< kaldi::PrintableRxfilename(disambig_in_rxfilename);
std::vector<int32> disambig_out;
if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
KALDI_ERR
<< "fstaddselfloops: Could not read disambiguation symbols from "
<< kaldi::PrintableRxfilename(disambig_out_rxfilename);
if (disambig_in.size() != disambig_out.size())
KALDI_ERR
<< "fstaddselfloops: mismatch in size of disambiguation symbols";
AddSelfLoops(fst, disambig_in, disambig_out);
WriteFstKaldi(*fst, fst_out_filename);
delete fst;
return 0;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
return 0;
}

@ -0,0 +1,114 @@
// fstbin/fstdeterminizestar.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/parse-options.h"
#if !defined(_MSC_VER) && !defined(__APPLE__)
#include <signal.h> // Comment this line and the call to signal below if
// it causes compilation problems. It is only to enable a debugging procedure
// when determinization does not terminate. We are disabling this code if
// compiling on Windows because signal.h is not available there, and on
// MacOS due to a problem with <signal.h> in the initial release of Sierra.
#endif
/* some test examples:
( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
cd ~/tmpdir
while true; do
fstrand > 1.fst
fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
> 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
"." done
Test of debugging [with non-determinizable input]:
( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
"2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
of fstdeterminizestar] # prints out a bunch of debugging output showing the
mess it got itself into.
*/
bool debug_location = false;
void signal_handler(int) { debug_location = true; }
int main(int argc, char *argv[]) {
try {
using namespace kaldi; // NOLINT
using namespace fst; // NOLINT
using kaldi::int32;
const char *usage =
"Removes epsilons and determinizes in one step\n"
"\n"
"Usage: fstdeterminizestar [in.fst [out.fst] ]\n"
"\n"
"See also: fstdeterminizelog, lattice-determinize\n";
float delta = kDelta;
int max_states = -1;
bool use_log = false;
ParseOptions po(usage);
po.Register("use-log", &use_log, "Determinize in log semiring.");
po.Register("delta", &delta,
"Delta value used to determine equivalence of weights.");
po.Register(
"max-states", &max_states,
"Maximum number of states in determinized FST before it will abort.");
po.Read(argc, argv);
if (po.NumArgs() > 2) {
po.PrintUsage();
exit(1);
}
std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
// This enables us to get traceback info from determinization that is
// not seeming to terminate.
#if !defined(_MSC_VER) && !defined(__APPLE__)
signal(SIGUSR1, signal_handler);
#endif
// Normal case: just files.
VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);
ArcSort(fst, ILabelCompare<StdArc>()); // improves speed.
if (use_log) {
DeterminizeStarInLog(fst, delta, &debug_location, max_states);
} else {
VectorFst<StdArc> det_fst;
DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
*fst = det_fst; // will do shallow copy and then det_fst goes
// out of scope anyway.
}
WriteFstKaldi(*fst, fst_out_str);
delete fst;
return 0;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

@ -0,0 +1,91 @@
// fstbin/fstisstochastic.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"
// e.g. of test:
// echo " 0 0" | fstcompile | fstisstochastic
// should return 0 and print "0 0" [meaning, min and
// max weight are one = exp(0)]
// echo " 0 1" | fstcompile | fstisstochastic
// should return 1, not stochastic, and print 1 1
// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
// though not stochastic because we gave it an absurdly large delta.
int main(int argc, char *argv[]) {
try {
using namespace kaldi; // NOLINT
using namespace fst; // NOLINT
using kaldi::int32;
const char *usage =
"Checks whether an FST is stochastic and exits with success if so.\n"
"Prints out maximum error (in log units).\n"
"\n"
"Usage: fstisstochastic [ in.fst ]\n";
float delta = 0.01;
bool test_in_log = true;
ParseOptions po(usage);
po.Register("delta", &delta, "Maximum error to accept.");
po.Register("test-in-log", &test_in_log,
"Test stochasticity in log semiring.");
po.Read(argc, argv);
if (po.NumArgs() > 1) {
po.PrintUsage();
exit(1);
}
std::string fst_in_filename = po.GetOptArg(1);
Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);
bool ans;
StdArc::Weight min, max;
if (test_in_log)
ans = IsStochasticFstInLog(*fst, delta, &min, &max);
else
ans = IsStochasticFst(*fst, delta, &min, &max);
std::cout << min.Value() << " " << max.Value() << '\n';
delete fst;
if (ans)
return 0; // success;
else
return 1;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

@ -0,0 +1,74 @@
// fstbin/fstminimizeencoded.cc
// Copyright 2009-2011 Microsoft Corporation
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"
#include "util/text-utils.h"
/* some test examples:
( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
fstminimizeencoded | fstprint
*/
int main(int argc, char *argv[]) {
try {
using namespace kaldi; // NOLINT
using namespace fst; // NOLINT
using kaldi::int32;
const char *usage =
"Minimizes FST after encoding [similar to fstminimize, but no "
"weight-pushing]\n"
"\n"
"Usage: fstminimizeencoded [in.fst [out.fst] ]\n";
float delta = kDelta;
ParseOptions po(usage);
po.Register("delta", &delta,
"Delta likelihood used for quantization of weights");
po.Read(argc, argv);
if (po.NumArgs() > 2) {
po.PrintUsage();
exit(1);
}
std::string fst_in_filename = po.GetOptArg(1),
fst_out_filename = po.GetOptArg(2);
VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
MinimizeEncoded(fst, delta);
WriteFstKaldi(*fst, fst_out_filename);
delete fst;
return 0;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
return 0;
}

@ -0,0 +1,133 @@
// fstbin/fsttablecompose.cc
// Copyright 2009-2011 Microsoft Corporation
// 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "fstext/table-matcher.h"
#include "util/parse-options.h"
/*
cd ~/tmpdir
while true; do
fstrand | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
> 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
echo -n "."
done
*/
int main(int argc, char *argv[]) {
try {
using namespace kaldi; // NOLINT
using namespace fst; // NOLINT
using kaldi::int32;
/*
fsttablecompose should always give equivalent results to compose,
but it is more efficient for certain kinds of inputs.
In particular, it is useful when, say, the left FST has states
that typically either have epsilon olabels, or
one transition out for each of the possible symbols (as the
olabel). The same with the input symbols of the right-hand FST
is possible.
*/
const char *usage =
"Composition algorithm [between two FSTs of standard type, in "
"tropical\n"
"semiring] that is more efficient for certain cases-- in particular,\n"
"where one of the FSTs (the left one, if --match-side=left) has large\n"
"out-degree\n"
"\n"
"Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
"(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
ParseOptions po(usage);
TableComposeOptions opts;
std::string match_side = "left";
std::string compose_filter = "sequence";
po.Register("connect", &opts.connect, "If true, trim FST before output.");
po.Register("match-side", &match_side,
"Side of composition to do table "
"match, one of: \"left\" or \"right\".");
po.Register("compose-filter", &compose_filter,
"Composition filter to use, "
"one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");
po.Read(argc, argv);
if (match_side == "left") {
opts.table_match_type = MATCH_OUTPUT;
} else if (match_side == "right") {
opts.table_match_type = MATCH_INPUT;
} else {
KALDI_ERR << "Invalid match-side option: " << match_side;
}
if (compose_filter == "alt_sequence") {
opts.filter_type = ALT_SEQUENCE_FILTER;
} else if (compose_filter == "auto") {
opts.filter_type = AUTO_FILTER;
} else if (compose_filter == "match") {
opts.filter_type = MATCH_FILTER;
} else if (compose_filter == "sequence") {
opts.filter_type = SEQUENCE_FILTER;
} else {
KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
}
if (po.NumArgs() < 2 || po.NumArgs() > 3) {
po.PrintUsage();
exit(1);
}
std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
fst_out_str = po.GetOptArg(3);
VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);
VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);
// Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
KALDI_WARN << "The first FST is not olabel sorted.";
}
if (fst2->Properties(fst::kILabelSorted, true) == 0) {
KALDI_WARN << "The second FST is not ilabel sorted.";
}
VectorFst<StdArc> composed_fst;
TableCompose(*fst1, *fst2, &composed_fst, opts);
delete fst1;
delete fst2;
WriteFstKaldi(composed_fst, fst_out_str);
return 0;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

@ -0,0 +1,97 @@
#!/usr/bin/env bash
current_path=`pwd`
current_dir=`basename "$current_path"`
if [ "tools" != "$current_dir" ]; then
echo "You should run this script in tools/ directory!!"
exit 1
fi
if [ ! -d liblbfgs-1.10 ]; then
echo Installing libLBFGS library to support MaxEnt LMs
bash extras/install_liblbfgs.sh || exit 1
fi
! command -v gawk > /dev/null && \
echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
if [ $# -ne 3 ]; then
echo "SRILM download requires some information about you"
echo
echo "Usage: $0 <name> <organization> <email>"
exit 1
fi
srilm_url="http://www.speech.sri.com/projects/srilm/srilm_download.php"
post_data="WWW_file=srilm-1.7.3.tar.gz&WWW_name=$1&WWW_org=$2&WWW_email=$3"
if ! wget --post-data "$post_data" -O ./srilm.tar.gz "$srilm_url"; then
echo 'There was a problem downloading the file.'
echo 'Check you internet connection and try again.'
exit 1
fi
mkdir -p srilm
cd srilm
if [ -f ../srilm.tgz ]; then
tar -xvzf ../srilm.tgz # Old SRILM format
elif [ -f ../srilm.tar.gz ]; then
tar -xvzf ../srilm.tar.gz # Changed format type from tgz to tar.gz
fi
major=`gawk -F. '{ print $1 }' RELEASE`
minor=`gawk -F. '{ print $2 }' RELEASE`
micro=`gawk -F. '{ print $3 }' RELEASE`
if [ $major -le 1 ] && [ $minor -le 7 ] && [ $micro -le 1 ]; then
echo "Detected version 1.7.1 or earlier. Applying patch."
patch -p0 < ../extras/srilm.patch
fi
# set the SRILM variable in the top-level Makefile to this directory.
cp Makefile tmpf
cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
> Makefile || exit 1
rm tmpf
mtype=`sbin/machine-type`
echo HAVE_LIBLBFGS=1 >> common/Makefile.machine.$mtype
grep ADDITIONAL_INCLUDES common/Makefile.machine.$mtype | \
sed 's|$| -I$(SRILM)/../liblbfgs-1.10/include|' \
>> common/Makefile.machine.$mtype
grep ADDITIONAL_LDFLAGS common/Makefile.machine.$mtype | \
sed 's|$| -L$(SRILM)/../liblbfgs-1.10/lib/ -Wl,-rpath -Wl,$(SRILM)/../liblbfgs-1.10/lib/|' \
>> common/Makefile.machine.$mtype
make || exit
cd ..
(
[ ! -z "${SRILM}" ] && \
echo >&2 "SRILM variable is aleady defined. Undefining..." && \
unset SRILM
[ -f ./env.sh ] && . ./env.sh
[ ! -z "${SRILM}" ] && \
echo >&2 "SRILM config is already in env.sh" && exit
wd=`pwd`
wd=`readlink -f $wd || pwd`
echo "export SRILM=$wd/srilm"
dirs="\${PATH}"
for directory in $(cd srilm && find bin -type d ) ; do
dirs="$dirs:\${SRILM}/$directory"
done
echo "export PATH=$dirs"
) >> env.sh
echo >&2 "Installation of SRILM finished successfully"
echo >&2 "Please source the tools/env.sh in your path.sh to enable it"

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
add_executable(arpa2fst ${CMAKE_CURRENT_SOURCE_DIR}/arpa2fst.cc)
target_include_directories(arpa2fst PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(arpa2fst )

@ -0,0 +1,145 @@
// bin/arpa2fst.cc
//
// Copyright 2009-2011 Gilles Boulianne.
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABILITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "lm/arpa-lm-compiler.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"
int main(int argc, char *argv[]) {
using namespace kaldi; // NOLINT
try {
const char *usage =
"Convert an ARPA format language model into an FST\n"
"Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
" e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
"data/lang/words.txt lm/input.arpa G.fst\n\n"
"Note: When called without switches, the output G.fst will contain\n"
"an embedded symbol table. This is compatible with the way a previous\n"
"version of arpa2fst worked.\n";
ParseOptions po(usage);
ArpaParseOptions options;
options.Register(&po);
// Option flags.
std::string bos_symbol = "<s>";
std::string eos_symbol = "</s>";
std::string disambig_symbol;
std::string read_syms_filename;
std::string write_syms_filename;
bool keep_symbols = false;
bool ilabel_sort = true;
po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
po.Register("disambig-symbol", &disambig_symbol,
"Disambiguator. If provided (e. g. #0), used on input side of "
"backoff links, and <s> and </s> are replaced with epsilons");
po.Register("read-symbol-table", &read_syms_filename,
"Use existing symbol table");
po.Register("write-symbol-table", &write_syms_filename,
"Write generated symbol table to a file");
po.Register("keep-symbols", &keep_symbols,
"Store symbol table with FST. Symbols always saved to FST if "
"symbol tables are neither read or written (otherwise symbols "
"would be lost entirely)");
po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");
po.Read(argc, argv);
if (po.NumArgs() != 1 && po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
std::string arpa_rxfilename = po.GetArg(1),
fst_wxfilename = po.GetOptArg(2);
int64 disambig_symbol_id = 0;
fst::SymbolTable *symbols;
if (!read_syms_filename.empty()) {
// Use existing symbols. Required symbols must be in the table.
kaldi::Input kisym(read_syms_filename);
symbols = fst::SymbolTable::ReadText(
kisym.Stream(), PrintableWxfilename(read_syms_filename));
if (symbols == NULL)
KALDI_ERR << "Could not read symbol table from file "
<< read_syms_filename;
options.oov_handling = ArpaParseOptions::kSkipNGram;
if (!disambig_symbol.empty()) {
disambig_symbol_id = symbols->Find(disambig_symbol);
if (disambig_symbol_id == -1) // fst::kNoSymbol
KALDI_ERR << "Symbol table " << read_syms_filename
<< " has no symbol for " << disambig_symbol;
}
} else {
// Create a new symbol table and populate it from ARPA file.
symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
options.oov_handling = ArpaParseOptions::kAddToSymbols;
symbols->AddSymbol("<eps>", 0);
if (!disambig_symbol.empty()) {
disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
}
}
// Add or use existing BOS and EOS.
options.bos_symbol = symbols->AddSymbol(bos_symbol);
options.eos_symbol = symbols->AddSymbol(eos_symbol);
// If producing new (not reading existing) symbols and not saving them,
// need to keep symbols with FST, otherwise they would be lost.
if (read_syms_filename.empty() && write_syms_filename.empty())
keep_symbols = true;
// Actually compile LM.
KALDI_ASSERT(symbols != NULL);
ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
{
Input ki(arpa_rxfilename);
lm_compiler.Read(ki.Stream());
}
// Sort the FST in-place if requested by options.
if (ilabel_sort) {
fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
}
// Write symbols if requested.
if (!write_syms_filename.empty()) {
kaldi::Output kosym(write_syms_filename, false);
symbols->WriteText(kosym.Stream());
}
// Write LM FST.
bool write_binary = true, write_header = false;
kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
lm_compiler.Fst().Write(kofst.Stream(), wopts);
delete symbols;
} catch (const std::exception &e) {
std::cerr << e.what();
return -1;
}
}
Loading…
Cancel
Save