fix libri ds2 scripts; add ngram and spm doc

4 years ago · c5d85a936c
parent 9f907b9bad
commit c5d85a936c
4 changed files with 21 additions and 12 deletions
--- a/examples/librispeech/s0/conf/deepspeech2.yaml
+++ b/examples/librispeech/s0/conf/deepspeech2.yaml
@ -10,9 +10,9 @@ data:
  min_input_len: 0.0
  max_input_len: 27.0 # second
  min_output_len: 0.0
-  max_output_len: 400.0
+  max_output_len: .inf
-  min_output_input_ratio: 0.05
+  min_output_input_ratio: 0.00
-  max_output_input_ratio: 10.0
+  max_output_input_ratio: .inf
  specgram_type: linear
  target_sample_rate: 16000
  max_freq: None
@ -41,7 +41,7 @@ training:
  lr: 1e-3
  lr_decay: 0.83
  weight_decay: 1e-06
-  global_grad_clip: 5.0
+  global_grad_clip: 3.0
  log_interval: 100
 decoding:
--- a/examples/librispeech/s0/local/data.sh
+++ b/examples/librispeech/s0/local/data.sh
@ -61,13 +61,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
-    --num_samples=-1 \
+    --num_samples=2000 \
    --specgram_type="linear" \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10.0 \
    --window_ms=20.0 \
-    --use_dB_normalization=False \
+    --use_dB_normalization=True \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"
--- a/examples/ngram_lm/README.md
+++ b/examples/ngram_lm/README.md
@ -0,0 +1,7 @@
 # Ngram LM
 Train chinese chararctor ngram lm by [kenlm](https://github.com/kpu/kenlm).
 ```
 bash run.sh
 ```
--- a/examples/spm/README.md
+++ b/examples/spm/README.md
@ -1,4 +1,6 @@
-# SPM demo
+# [SentencePiece Model](https://github.com/google/sentencepiece)
 Train a `spm` model for English tokenizer.
 ```
 bash run.sh