fix libri ds2 scripts; add ngram and spm doc

pull/622/head
Hui Zhang 4 years ago
parent 9f907b9bad
commit c5d85a936c

@ -10,9 +10,9 @@ data:
min_input_len: 0.0 min_input_len: 0.0
max_input_len: 27.0 # second max_input_len: 27.0 # second
min_output_len: 0.0 min_output_len: 0.0
max_output_len: 400.0 max_output_len: .inf
min_output_input_ratio: 0.05 min_output_input_ratio: 0.00
max_output_input_ratio: 10.0 max_output_input_ratio: .inf
specgram_type: linear specgram_type: linear
target_sample_rate: 16000 target_sample_rate: 16000
max_freq: None max_freq: None
@ -41,7 +41,7 @@ training:
lr: 1e-3 lr: 1e-3
lr_decay: 0.83 lr_decay: 0.83
weight_decay: 1e-06 weight_decay: 1e-06
global_grad_clip: 5.0 global_grad_clip: 3.0
log_interval: 100 log_interval: 100
decoding: decoding:

@ -61,13 +61,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
num_workers=$(nproc) num_workers=$(nproc)
python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
--manifest_path="data/manifest.train.raw" \ --manifest_path="data/manifest.train.raw" \
--num_samples=-1 \ --num_samples=2000 \
--specgram_type="linear" \ --specgram_type="linear" \
--delta_delta=false \ --delta_delta=false \
--sample_rate=16000 \ --sample_rate=16000 \
--stride_ms=10.0 \ --stride_ms=10.0 \
--window_ms=20.0 \ --window_ms=20.0 \
--use_dB_normalization=False \ --use_dB_normalization=True \
--num_workers=${num_workers} \ --num_workers=${num_workers} \
--output_path="data/mean_std.json" --output_path="data/mean_std.json"

@ -0,0 +1,7 @@
# Ngram LM
Train chinese chararctor ngram lm by [kenlm](https://github.com/kpu/kenlm).
```
bash run.sh
```

@ -1,4 +1,6 @@
# SPM demo # [SentencePiece Model](https://github.com/google/sentencepiece)
Train a `spm` model for English tokenizer.
``` ```
bash run.sh bash run.sh

Loading…
Cancel
Save