From 5170ccf00db57264fd9a4eda6ef38c8096705a3b Mon Sep 17 00:00:00 2001 From: Yang Zhou Date: Mon, 28 Mar 2022 20:26:35 +0800 Subject: [PATCH] rm example/aishell --- .../aishell/local/aishell_train_lms.sh | 59 ------------------- speechx/examples/aishell/mkgraph.sh | 31 ---------- speechx/examples/aishell/path.sh | 14 ----- 3 files changed, 104 deletions(-) delete mode 100755 speechx/examples/aishell/local/aishell_train_lms.sh delete mode 100644 speechx/examples/aishell/mkgraph.sh delete mode 100644 speechx/examples/aishell/path.sh diff --git a/speechx/examples/aishell/local/aishell_train_lms.sh b/speechx/examples/aishell/local/aishell_train_lms.sh deleted file mode 100755 index 30ffb797..00000000 --- a/speechx/examples/aishell/local/aishell_train_lms.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - - -# To be run from one directory above this script. -. ./path.sh - -text=data/local/lm/text -lexicon=data/local/dict/lexicon.txt - -for f in "$text" "$lexicon"; do - [ ! -f $x ] && echo "$0: No such file $f" && exit 1; -done - -# Check SRILM tools -if ! which ngram-count > /dev/null; then - echo "srilm tools are not found, please download it and install it from: " - echo "http://www.speech.sri.com/projects/srilm/download.html" - echo "Then add the tools to your PATH" - exit 1 -fi - -# This script takes no arguments. It assumes you have already run -# aishell_data_prep.sh. -# It takes as input the files -# data/local/lm/text -# data/local/dict/lexicon.txt -dir=data/local/lm -mkdir -p $dir - - -cleantext=$dir/text.no_oov - -cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } - {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ - > $cleantext || exit 1; - -cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ - sort -nr > $dir/word.counts || exit 1; - -# Get counts from acoustic training transcripts, and add one-count -# for each word in the lexicon (but not silence, we don't want it -# in the LM-- we'll add it optionally later). -cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ - cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ - sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; - -cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo ""; echo "" ) > $dir/wordlist - -heldout_sent=10000 # Don't change this if you want result to be comparable with - # kaldi_lm results -mkdir -p $dir -cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/heldout -cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/train - -ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \ - -map-unk "" -kndiscount -interpolate -lm $dir/lm.arpa -ngram -lm $dir/lm.arpa -ppl $dir/heldout diff --git a/speechx/examples/aishell/mkgraph.sh b/speechx/examples/aishell/mkgraph.sh deleted file mode 100644 index f66cd4dc..00000000 --- a/speechx/examples/aishell/mkgraph.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -. ./path.sh || exit 1; - -. tools/parse_options.sh || exit 1; - -data=/mnt/dataset/aishell - -# Optionally, you can add LM and test it with runtime. -dir=./ds2_graph -dict=$dir/vocab.txt -if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then - # 7.1 Prepare dict - unit_file=$dict - mkdir -p $dir/local/dict - cp $unit_file $dir/local/dict/units.txt - tools/fst/prepare_dict.py $unit_file ${data}/resource_aishell/lexicon.txt \ - $dir/local/dict/lexicon.txt - # Train lm - lm=$dir/local/lm - mkdir -p $lm - tools/filter_scp.pl data/train/text \ - $data/data_aishell/transcript/aishell_transcript_v0.8.txt > $lm/text - local/ds2_aishell_train_lms.sh - # Build decoding TLG - tools/fst/compile_lexicon_token_fst.sh \ - $dir/local/dict $dir/local/tmp $dir/local/lang - tools/fst/make_tlg.sh $dir/local/lm $dir/local/lang $dir/lang_test || exit 1; -fi - - diff --git a/speechx/examples/aishell/path.sh b/speechx/examples/aishell/path.sh deleted file mode 100644 index 8ab7ee29..00000000 --- a/speechx/examples/aishell/path.sh +++ /dev/null @@ -1,14 +0,0 @@ -# This contains the locations of binarys build required for running the examples. - -SPEECHX_ROOT=$PWD/../.. -SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples - -SPEECHX_TOOLS=$SPEECHX_ROOT/tools -TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin - -[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; } - -export LC_AL=C - -SPEECHX_BIN=$SPEECHX_EXAMPLES/feat -export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN