fix the bug: read space as unk

pull/851/head
huangyuxin 3 years ago
parent 4739b5a02a
commit 264bba760b

@ -83,6 +83,8 @@ class TextFeaturizer():
tokens = self.tokenize(text) tokens = self.tokenize(text)
ids = [] ids = []
for token in tokens: for token in tokens:
if '' in self.vocab_dict and token == ' ':
token = ''
token = token if token in self.vocab_dict else self.unk token = token if token in self.vocab_dict else self.unk
ids.append(self.vocab_dict[token]) ids.append(self.vocab_dict[token])
return ids return ids

@ -2,3 +2,4 @@ exp
data data
*log *log
tmp tmp
nohup*

@ -1,4 +1,10 @@
#!/bin/bash #!/bin/bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
stage=-1 stage=-1
stop_stage=100 stop_stage=100
@ -9,17 +15,16 @@ mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR} mkdir -p ${TARGET_DIR}
bash local/download_model.sh bash local/download_model.sh ${ckpt_dir}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
exit 1 exit 1
fi fi
cd ${ckpt_dir}
tar xzvf aishell_model_v1.8_to_v2.x.tar.gz tar xzvf aishell_model_v1.8_to_v2.x.tar.gz
mv aishell_v1.8.pdparams exp/deepspeech2/checkpoints/ cd -
mv README.md exp/deepspeech2/ mv ${ckpt_dir}/mean_std.npz data/
mv mean_std.npz data/ mv ${ckpt_dir}/vocab.txt data/
mv vocab.txt data/
rm aishell_model_v1.8_to_v2.x.tar.gz -f
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
@ -39,27 +44,6 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--specgram_type="linear" \
--delta_delta=false \
--stride_ms=10.0 \
--window_ms=20.0 \
--sample_rate=16000 \
--use_dB_normalization=True \
--num_samples=2000 \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size # format manifest with tokenids, vocab size
@ -67,7 +51,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
{ {
python3 ${MAIN_ROOT}/utils/format_data.py \ python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \ --feat_type "raw" \
--cmvn_path "data/mean_std.json" \ --cmvn_path "data/mean_std.npz" \
--unit_type "char" \ --unit_type "char" \
--vocab_path="data/vocab.txt" \ --vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.${dataset}.raw" \ --manifest_path="data/manifest.${dataset}.raw" \

@ -1,10 +1,17 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
. ${MAIN_ROOT}/utils/utility.sh . ${MAIN_ROOT}/utils/utility.sh
URL='https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz' URL='https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz'
MD5=4ade113c69ea291b8ce5ec6a03296659 MD5=4ade113c69ea291b8ce5ec6a03296659
TARGET=./aishell_model_v1.8_to_v2.x.tar.gz TARGET=${ckpt_dir}/aishell_model_v1.8_to_v2.x.tar.gz
echo "Download Aishell model ..." echo "Download Aishell model ..."
@ -13,7 +20,6 @@ if [ $? -ne 0 ]; then
echo "Fail to download Aishell model!" echo "Fail to download Aishell model!"
exit 1 exit 1
fi fi
tar -zxvf $TARGET
exit 0 exit 0

@ -12,5 +12,4 @@ export PYTHONPATH=${LOCAL_DEEPSPEECH2}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
MODEL=deepspeech2 MODEL=deepspeech2
export BIN_DIR=${LOCAL_DEEPSPEECH2}/deepspeech2x/bin export BIN_DIR=${LOCAL_DEEPSPEECH2}/src_deepspeech2x/bin
echo "BIN_DIR "${BIN_DIR}

@ -17,11 +17,11 @@ echo "checkpoint name ${ckpt}"
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data # prepare data
mkdir -p exp/${ckpt}/checkpoints mkdir -p exp/${ckpt}/checkpoints
bash ./local/data.sh || exit -1 bash ./local/data.sh exp/${ckpt}/checkpoints || exit -1
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# test ckpt avg_n # test ckpt avg_n
CUDA_VISIBLE_DEVICES=1 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${v18_ckpt} ${model_type}|| exit -1 CUDA_VISIBLE_DEVICES=2 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${v18_ckpt} ${model_type}|| exit -1
fi fi

@ -2,3 +2,4 @@ exp
data data
*log *log
tmp tmp
nohup*

@ -1,7 +1,14 @@
#!/bin/bash #!/bin/bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
stage=-1 stage=-1
stop_stage=100 stop_stage=100
unit_type=char
source ${MAIN_ROOT}/utils/parse_options.sh source ${MAIN_ROOT}/utils/parse_options.sh
@ -10,17 +17,17 @@ TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR} mkdir -p ${TARGET_DIR}
bash local/download_model.sh bash local/download_model.sh ${ckpt_dir}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
exit 1 exit 1
fi fi
cd ${ckpt_dir}
tar xzvf baidu_en8k_v1.8_to_v2.x.tar.gz tar xzvf baidu_en8k_v1.8_to_v2.x.tar.gz
mv baidu_en8k_v1.8.pdparams exp/deepspeech2/checkpoints/ cd -
mv README.md exp/deepspeech2/ mv ${ckpt_dir}/mean_std.npz data/
mv mean_std.npz data/ mv ${ckpt_dir}/vocab.txt data/
mv vocab.txt data/
rm baidu_en8k_v1.8_to_v2.x.tar.gz -f
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# download data, generate manifests # download data, generate manifests
@ -53,35 +60,13 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--num_samples=2000 \
--specgram_type="linear" \
--delta_delta=false \
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=20.0 \
--use_dB_normalization=True \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size # format manifest with tokenids, vocab size
for set in train dev test dev-clean dev-other test-clean test-other; do for set in train dev test dev-clean dev-other test-clean test-other; do
{ {
python3 ${MAIN_ROOT}/utils/format_data.py \ python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \ --feat_type "raw" \
--cmvn_path "data/mean_std.json" \ --cmvn_path "data/mean_std.npz" \
--unit_type ${unit_type} \ --unit_type ${unit_type} \
--vocab_path="data/vocab.txt" \ --vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.${set}.raw" \ --manifest_path="data/manifest.${set}.raw" \

@ -1,10 +1,17 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
. ${MAIN_ROOT}/utils/utility.sh . ${MAIN_ROOT}/utils/utility.sh
URL='https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz' URL='https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz'
MD5=fdabeb6c96963ac85d9188f0275c6a1b MD5=fdabeb6c96963ac85d9188f0275c6a1b
TARGET=./baidu_en8k_v1.8_to_v2.x.tar.gz TARGET=${ckpt_dir}/baidu_en8k_v1.8_to_v2.x.tar.gz
echo "Download BaiduEn8k model ..." echo "Download BaiduEn8k model ..."
@ -13,7 +20,6 @@ if [ $? -ne 0 ]; then
echo "Fail to download BaiduEn8k model!" echo "Fail to download BaiduEn8k model!"
exit 1 exit 1
fi fi
tar -zxvf $TARGET
exit 0 exit 0

@ -12,5 +12,5 @@ export PYTHONPATH=${LOCAL_DEEPSPEECH2}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
MODEL=deepspeech2 MODEL=deepspeech2
export BIN_DIR=${LOCAL_DEEPSPEECH2}/deepspeech2x/bin export BIN_DIR=${LOCAL_DEEPSPEECH2}/src_deepspeech2x/bin
echo "BIN_DIR "${BIN_DIR} echo "BIN_DIR "${BIN_DIR}

@ -17,7 +17,7 @@ echo "checkpoint name ${ckpt}"
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data # prepare data
mkdir -p exp/${ckpt}/checkpoints mkdir -p exp/${ckpt}/checkpoints
bash ./local/data.sh || exit -1 bash ./local/data.sh exp/${ckpt}/checkpoints || exit -1
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then

@ -2,3 +2,4 @@ exp
data data
*log *log
tmp tmp
nohup*

@ -1,7 +1,15 @@
#!/bin/bash #!/bin/bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
stage=-1 stage=-1
stop_stage=100 stop_stage=100
unit_type=char
source ${MAIN_ROOT}/utils/parse_options.sh source ${MAIN_ROOT}/utils/parse_options.sh
@ -9,18 +17,16 @@ mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR} mkdir -p ${TARGET_DIR}
bash local/download_model.sh ${ckpt_dir}
bash local/download_model.sh
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
exit 1 exit 1
fi fi
cd ${ckpt_dir}
tar xzvf librispeech_v1.8_to_v2.x.tar.gz tar xzvf librispeech_v1.8_to_v2.x.tar.gz
mv librispeech_v1.8.pdparams exp/deepspeech2/checkpoints/ cd -
mv README.md exp/deepspeech2/ mv ${ckpt_dir}/mean_std.npz data/
mv mean_std.npz data/ mv ${ckpt_dir}/vocab.txt data/
mv vocab.txt data/
rm librispeech_v1.8_to_v2.x.tar.gz -f
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
# download data, generate manifests # download data, generate manifests
@ -52,36 +58,13 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
done done
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# compute mean and stddev for normalizer
num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \
--num_samples=2000 \
--specgram_type="linear" \
--delta_delta=false \
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=20.0 \
--use_dB_normalization=True \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size # format manifest with tokenids, vocab size
for set in train dev test dev-clean dev-other test-clean test-other; do for set in train dev test dev-clean dev-other test-clean test-other; do
{ {
python3 ${MAIN_ROOT}/utils/format_data.py \ python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \ --feat_type "raw" \
--cmvn_path "data/mean_std.json" \ --cmvn_path "data/mean_std.npz" \
--unit_type ${unit_type} \ --unit_type ${unit_type} \
--vocab_path="data/vocab.txt" \ --vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.${set}.raw" \ --manifest_path="data/manifest.${set}.raw" \

@ -1,10 +1,17 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_dir"
exit -1
fi
ckpt_dir=$1
. ${MAIN_ROOT}/utils/utility.sh . ${MAIN_ROOT}/utils/utility.sh
URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz' URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz'
MD5=7b0f582fe2f5a840b840e7ee52246bc5 MD5=7b0f582fe2f5a840b840e7ee52246bc5
TARGET=./librispeech_v1.8_to_v2.x.tar.gz TARGET=${ckpt_dir}/librispeech_v1.8_to_v2.x.tar.gz
echo "Download LibriSpeech model ..." echo "Download LibriSpeech model ..."
@ -13,7 +20,6 @@ if [ $? -ne 0 ]; then
echo "Fail to download LibriSpeech model!" echo "Fail to download LibriSpeech model!"
exit 1 exit 1
fi fi
tar -zxvf $TARGET
exit 0 exit 0

@ -12,5 +12,5 @@ export PYTHONPATH=${LOCAL_DEEPSPEECH2}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
MODEL=deepspeech2 MODEL=deepspeech2
export BIN_DIR=${LOCAL_DEEPSPEECH2}/deepspeech2x/bin export BIN_DIR=${LOCAL_DEEPSPEECH2}/src_deepspeech2x/bin
echo "BIN_DIR "${BIN_DIR} echo "BIN_DIR "${BIN_DIR}

@ -17,10 +17,10 @@ echo "checkpoint name ${ckpt}"
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data # prepare data
mkdir -p exp/${ckpt}/checkpoints mkdir -p exp/${ckpt}/checkpoints
bash ./local/data.sh || exit -1 bash ./local/data.sh exp/${ckpt}/checkpoints || exit -1
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# test ckpt avg_n # test ckpt avg_n
CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${v18_ckpt} ${model_type}|| exit -1 CUDA_VISIBLE_DEVICES=1 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${v18_ckpt} ${model_type}|| exit -1
fi fi

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Evaluation for DeepSpeech2 model.""" """Evaluation for DeepSpeech2 model."""
from deepspeech2x.model import DeepSpeech2Tester as Tester from src_deepspeech2x.test_model import DeepSpeech2Tester as Tester
from deepspeech.exps.deepspeech2.config import get_cfg_defaults from deepspeech.exps.deepspeech2.config import get_cfg_defaults
from deepspeech.training.cli import default_argument_parser from deepspeech.training.cli import default_argument_parser

@ -15,8 +15,8 @@
from typing import Optional from typing import Optional
import paddle import paddle
from deepspeech2x.models.ds2.rnn import RNNStack
from paddle import nn from paddle import nn
from src_deepspeech2x.models.ds2.rnn import RNNStack
from yacs.config import CfgNode from yacs.config import CfgNode
from deepspeech.models.ds2.conv import ConvStack from deepspeech.models.ds2.conv import ConvStack

@ -20,10 +20,10 @@ from typing import Optional
import numpy as np import numpy as np
import paddle import paddle
from deepspeech2x.models.ds2 import DeepSpeech2InferModel
from deepspeech2x.models.ds2 import DeepSpeech2Model
from paddle import distributed as dist from paddle import distributed as dist
from paddle.io import DataLoader from paddle.io import DataLoader
from src_deepspeech2x.models.ds2 import DeepSpeech2InferModel
from src_deepspeech2x.models.ds2 import DeepSpeech2Model
from yacs.config import CfgNode from yacs.config import CfgNode
from deepspeech.io.collator import SpeechCollator from deepspeech.io.collator import SpeechCollator

@ -53,7 +53,8 @@ def main():
fout = open(args.output_path, 'w', encoding='utf-8') fout = open(args.output_path, 'w', encoding='utf-8')
# get feat dim # get feat dim
mean, std = load_cmvn(args.cmvn_path, filetype='json') filetype = args.cmvn_path.split(".")[-1]
mean, istd = load_cmvn(args.cmvn_path, filetype=filetype)
feat_dim = mean.shape[0] #(D) feat_dim = mean.shape[0] #(D)
print(f"Feature dim: {feat_dim}") print(f"Feature dim: {feat_dim}")

Loading…
Cancel
Save