fix libri ds2 scripts; add ngram and spm doc

pull/622/head
Hui Zhang 4 years ago
parent 9f907b9bad
commit c5d85a936c

@ -10,9 +10,9 @@ data:
min_input_len: 0.0 min_input_len: 0.0
max_input_len: 27.0 # second max_input_len: 27.0 # second
min_output_len: 0.0 min_output_len: 0.0
max_output_len: 400.0 max_output_len: .inf
min_output_input_ratio: 0.05 min_output_input_ratio: 0.00
max_output_input_ratio: 10.0 max_output_input_ratio: .inf
specgram_type: linear specgram_type: linear
target_sample_rate: 16000 target_sample_rate: 16000
max_freq: None max_freq: None
@ -21,7 +21,7 @@ data:
window_ms: 20.0 window_ms: 20.0
delta_delta: False delta_delta: False
dither: 1.0 dither: 1.0
use_dB_normalization: True use_dB_normalization: True
target_dB: -20 target_dB: -20
random_seed: 0 random_seed: 0
keep_transcription_text: False keep_transcription_text: False
@ -41,7 +41,7 @@ training:
lr: 1e-3 lr: 1e-3
lr_decay: 0.83 lr_decay: 0.83
weight_decay: 1e-06 weight_decay: 1e-06
global_grad_clip: 5.0 global_grad_clip: 3.0
log_interval: 100 log_interval: 100
decoding: decoding:

@ -17,12 +17,12 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
--manifest_prefix="data/manifest" \ --manifest_prefix="data/manifest" \
--target_dir="${TARGET_DIR}/librispeech" \ --target_dir="${TARGET_DIR}/librispeech" \
--full_download="True" --full_download="True"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated." echo "Prepare LibriSpeech failed. Terminated."
exit 1 exit 1
fi fi
for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
mv data/manifest.${set} data/manifest.${set}.raw mv data/manifest.${set} data/manifest.${set}.raw
done done
@ -48,7 +48,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--count_threshold=0 \ --count_threshold=0 \
--vocab_path="data/vocab.txt" \ --vocab_path="data/vocab.txt" \
--manifest_paths="data/manifest.train.raw" --manifest_paths="data/manifest.train.raw"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated." echo "Build vocabulary failed. Terminated."
exit 1 exit 1
@ -61,16 +61,16 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
num_workers=$(nproc) num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \ --manifest_path="data/manifest.train.raw" \
--num_samples=-1 \ --num_samples=2000 \
--specgram_type="linear" \ --specgram_type="linear" \
--delta_delta=false \ --delta_delta=false \
--sample_rate=16000 \ --sample_rate=16000 \
--stride_ms=10.0 \ --stride_ms=10.0 \
--window_ms=20.0 \ --window_ms=20.0 \
--use_dB_normalization=False \ --use_dB_normalization=True \
--num_workers=${num_workers} \ --num_workers=${num_workers} \
--output_path="data/mean_std.json" --output_path="data/mean_std.json"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated." echo "Compute mean and stddev failed. Terminated."
exit 1 exit 1

@ -0,0 +1,7 @@
# Ngram LM
Train chinese chararctor ngram lm by [kenlm](https://github.com/kpu/kenlm).
```
bash run.sh
```

@ -1,4 +1,6 @@
# SPM demo # [SentencePiece Model](https://github.com/google/sentencepiece)
Train a `spm` model for English tokenizer.
``` ```
bash run.sh bash run.sh

Loading…
Cancel
Save