refactor data pipe; fix conf; fix u2 default params

pull/578/head
Hui Zhang 4 years ago
parent c4df6bac32
commit 88d82b4ab8

@ -18,6 +18,7 @@ from deepspeech.exps.u2.model import U2Trainer
from deepspeech.io.dataset import ManifestDataset from deepspeech.io.dataset import ManifestDataset
from deepspeech.models.u2 import U2Model from deepspeech.models.u2 import U2Model
_C = CfgNode() _C = CfgNode()
_C.data = ManifestDataset.params() _C.data = ManifestDataset.params()

@ -148,6 +148,8 @@ class FeatureNormalizer(object):
batch_size=64, batch_size=64,
eps=1e-20): eps=1e-20):
"""Compute mean and std from randomly sampled instances.""" """Compute mean and std from randomly sampled instances."""
paddle.set_device('cpu')
collate_func = CollateFunc(featurize_func) collate_func = CollateFunc(featurize_func)
dataset = AudioDataset(manifest_path, num_samples, self._rng) dataset = AudioDataset(manifest_path, num_samples, self._rng)
data_loader = DataLoader( data_loader = DataLoader(

@ -61,12 +61,14 @@ class U2BaseModel(nn.Module):
def params(cls, config: Optional[CfgNode]=None) -> CfgNode: def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
# network architecture # network architecture
default = CfgNode() default = CfgNode()
# allow add new item when merge_with_file
default.set_new_allowed(True)
default.cmvn_file = "" default.cmvn_file = ""
default.cmvn_file_type = "npz" default.cmvn_file_type = "npz"
default.input_dim = 0 default.input_dim = 0
default.output_dim = 0 default.output_dim = 0
# encoder related # encoder related
default.encoder = 'conformer' default.encoder = 'transformer'
default.encoder_conf = CfgNode( default.encoder_conf = CfgNode(
dict( dict(
output_size=256, # dimension of attention output_size=256, # dimension of attention
@ -78,11 +80,12 @@ class U2BaseModel(nn.Module):
attention_dropout_rate=0.0, attention_dropout_rate=0.0,
input_layer='conv2d', # encoder input type, you can chose conv2d, conv2d6 and conv2d8 input_layer='conv2d', # encoder input type, you can chose conv2d, conv2d6 and conv2d8
normalize_before=True, normalize_before=True,
cnn_module_kernel=15, # use_cnn_module=True,
use_cnn_module=True, # cnn_module_kernel=15,
activation_type='swish', # activation_type='swish',
pos_enc_layer_type='rel_pos', # pos_enc_layer_type='rel_pos',
selfattention_layer_type='rel_selfattn', )) # selfattention_layer_type='rel_selfattn',
))
# decoder related # decoder related
default.decoder = 'transformer' default.decoder = 'transformer'
default.decoder_conf = CfgNode( default.decoder_conf = CfgNode(

@ -1,11 +1,11 @@
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
data: data:
train_manifest: data/manifest.tiny train_manifest: data/manifest.train
dev_manifest: data/manifest.tiny dev_manifest: data/manifest.dev
test_manifest: data/manifest.tiny test_manifest: data/manifest.test
vocab_filepath: data/vocab.txt vocab_filepath: data/vocab.txt
unit_type: 'spm' unit_type: 'spm'
spm_model_prefix: 'data/bpe_unigram_200' spm_model_prefix: 'data/bpe_unigram_5000'
mean_std_filepath: "" mean_std_filepath: ""
augmentation_config: conf/augmentation.json augmentation_config: conf/augmentation.json
batch_size: 4 batch_size: 4

@ -1,11 +1,11 @@
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
data: data:
train_manifest: data/manifest.tiny train_manifest: data/manifest.train
dev_manifest: data/manifest.tiny dev_manifest: data/manifest.dev
test_manifest: data/manifest.tiny test_manifest: data/manifest.test
vocab_filepath: data/vocab.txt vocab_filepath: data/vocab.txt
unit_type: 'spm' unit_type: 'spm'
spm_model_prefix: 'data/bpe_unigram_200' spm_model_prefix: 'data/bpe_unigram_5000'
mean_std_filepath: "" mean_std_filepath: ""
augmentation_config: conf/augmentation.json augmentation_config: conf/augmentation.json
batch_size: 64 batch_size: 64

@ -1,8 +1,8 @@
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
data: data:
train_manifest: data/manifest.tiny train_manifest: data/manifest.train
dev_manifest: data/manifest.tiny dev_manifest: data/manifest.dev
test_manifest: data/manifest.tiny test_manifest: data/manifest.test
vocab_filepath: data/vocab.txt vocab_filepath: data/vocab.txt
unit_type: 'spm' unit_type: 'spm'
spm_model_prefix: 'data/bpe_unigram_5000' spm_model_prefix: 'data/bpe_unigram_5000'

@ -1,11 +1,11 @@
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
data: data:
train_manifest: data/manifest.tiny train_manifest: data/manifest.train
dev_manifest: data/manifest.tiny dev_manifest: data/manifest.dev
test_manifest: data/manifest.tiny test_manifest: data/manifest.test
vocab_filepath: data/vocab.txt vocab_filepath: data/vocab.txt
unit_type: 'spm' unit_type: 'spm'
spm_model_prefix: 'data/bpe_unigram_200' spm_model_prefix: 'data/bpe_unigram_5000'
mean_std_filepath: "" mean_std_filepath: ""
augmentation_config: conf/augmentation.json augmentation_config: conf/augmentation.json
batch_size: 64 batch_size: 64

@ -27,8 +27,20 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
exit 1 exit 1
fi fi
for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
mv data/manifest.${set} data/manifest.${set}.raw
done
for set in train-clean-100 train-clean-360 train-other-500; do for set in train-clean-100 train-clean-360 train-other-500; do
cat data/manifest.${set} >> data/manifest.train.raw cat data/manifest.${set}.raw >> data/manifest.train.raw
done
for set in dev-clean dev-other; do
cat data/manifest.${set}.raw >> data/manifest.dev.raw
done
for set in test-clean test-other; do
cat data/manifest.${set}.raw >> data/manifest.test.raw
done done
fi fi
@ -73,20 +85,24 @@ fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# format manifest with tokenids, vocab size # format manifest with tokenids, vocab size
for set in train dev test dev-clean dev-other test-clean test-other; do
{
python3 ${MAIN_ROOT}/utils/format_data.py \ python3 ${MAIN_ROOT}/utils/format_data.py \
--feat_type "raw" \ --feat_type "raw" \
--cmvn_path "data/mean_std.json" \ --cmvn_path "data/mean_std.json" \
--unit_type "spm" \ --unit_type "spm" \
--spm_model_prefix ${bpeprefix} \ --spm_model_prefix ${bpeprefix} \
--vocab_path="data/vocab.txt" \ --vocab_path="data/vocab.txt" \
--manifest_path="data/manifest.train.raw" \ --manifest_path="data/manifest.${set}.raw" \
--output_path="data/manifest.train" --output_path="data/manifest.${set}"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Formt mnaifest failed. Terminated." echo "Formt mnaifest failed. Terminated."
exit 1 exit 1
fi fi
}&
done
wait
fi fi
echo "LibriSpeech Data preparation done." echo "LibriSpeech Data preparation done."

@ -4,7 +4,8 @@ source path.sh
stage=0 stage=0
stop_stage=100 stop_stage=100
ckpt=conformer conf_path=conf/transformer.yaml
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
avg_num=30 avg_num=30
avg_ckpt=avg_${avg_num} avg_ckpt=avg_${avg_num}
@ -17,7 +18,7 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir # train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh conf/conformer.yaml ${ckpt} CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh ${conf_path} ${ckpt}
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@ -27,10 +28,10 @@ fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n # test ckpt avg_n
CUDA_VISIBLE_DEVICES=7 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 CUDA_VISIBLE_DEVICES=7 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n # export ckpt avg_n
CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi fi

@ -4,7 +4,8 @@ source path.sh
stage=0 stage=0
stop_stage=100 stop_stage=100
ckpt=conformer conf_path=conf/transformer.yaml
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
avg_num=1 avg_num=1
avg_ckpt=avg_${avg_num} avg_ckpt=avg_${avg_num}
@ -17,7 +18,7 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir # train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml ${ckpt} CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh ${conf_path} ${ckpt}
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@ -27,10 +28,10 @@ fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n # test ckpt avg_n
CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 CUDA_VISIBLE_DEVICES=7 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n # export ckpt avg_n
CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi fi

Loading…
Cancel
Save