From e66da76db9b24fcfd58e2578052cad5fb896a629 Mon Sep 17 00:00:00 2001
From: huangyuxin <hyxin2014@126.com>
Date: Fri, 29 Oct 2021 10:16:09 +0000
Subject: [PATCH] fix the bug of chooing dataloader, remove the log of
 downloads lm, change the epoch in tiny

---
 README.md                                      | 18 +++++++++---------
 deepspeech/exps/deepspeech2/model.py           |  9 ++++++---
 examples/aishell/s0/local/test.sh              |  2 +-
 examples/aishell/s0/local/test_export.sh       |  2 +-
 examples/librispeech/s0/local/test.sh          |  2 +-
 examples/other/1xt2x/aishell/local/test.sh     |  2 +-
 examples/other/1xt2x/baidu_en8k/local/test.sh  |  2 +-
 examples/other/1xt2x/librispeech/local/test.sh |  2 +-
 examples/tiny/s0/conf/deepspeech2.yaml         |  2 +-
 examples/tiny/s0/conf/deepspeech2_online.yaml  |  2 +-
 examples/tiny/s0/local/test.sh                 |  2 +-
 examples/tiny/s1/conf/chunk_confermer.yaml     |  2 +-
 examples/tiny/s1/conf/chunk_transformer.yaml   |  2 +-
 examples/tiny/s1/conf/conformer.yaml           |  2 +-
 examples/tiny/s1/conf/transformer.yaml         |  2 +-
 15 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 7060a6556..8a83ac619 100644
--- a/README.md
+++ b/README.md
@@ -9,20 +9,20 @@ English | [简体中文](README_ch.md)
 </p>
 <div align="center">  
 
-  <h3> 
+  <h3>
   <a href="#quick-start"> Quick Start </a>
   | <a href="#tutorials"> Tutorials </a>
-  | <a href="#model-list"> Models List </a> 
+  | <a href="#model-list"> Models List </a>
 </div>
-  
+
 ------------------------------------------------------------------------------------
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
 
 <!---
-why they should use your module, 
-how they can install it, 
+why they should use your module,
+how they can install it,
 how they can use it
 -->
 
@@ -31,7 +31,7 @@ how they can use it
 Via the easy-to-use, efficient, flexible and scalable implementation, our vision is to empower both industrial application and academic research, including training, inference & testing modules, and deployment process. To be more specific, this toolkit features at:
 - **Fast and Light-weight**: we provide high-speed and ultra-lightweight models that are convenient for industrial deployment.
 - **Rule-based Chinese frontend**: our frontend contains Text Normalization (TN) and Grapheme-to-Phoneme (G2P, including Polyphone and Tone Sandhi). Moreover, we use self-defined linguistic rules to adapt Chinese context.
-- **Varieties of Functions that Vitalize both Industrial and Academia**: 
+- **Varieties of Functions that Vitalize both Industrial and Academia**:
   - *Implementation of critical audio tasks*: this toolkit contains audio functions like Speech Translation (ST), Automatic Speech Recognition (ASR), Text-To-Speech Synthesis (TTS), Voice Cloning(VC), Punctuation Restoration, etc.
   - *Integration of mainstream models and datasets*: the toolkit implements modules that participate in the whole pipeline of the speech tasks, and uses mainstream datasets like LibriSpeech, LJSpeech, AIShell, CSMSC, etc. See also [model lists](#models-list) for more details.
   - *Cross-domain application*: as an extension of the application of traditional audio tasks, we combine the aforementioned tasks with other fields like NLP.
@@ -70,7 +70,7 @@ If you want to set up PaddleSpeech in other environment, please see the [ASR ins
 ## Quick Start
 > Note: the current links to `English ASR` and `English TTS` are not valid.
 
-Just a quick test of our functions: [English ASR](link/hubdetail?name=deepspeech2_aishell&en_category=AutomaticSpeechRecognition) and [English TTS](link/hubdetail?name=fastspeech2_baker&en_category=TextToSpeech) by typing message or upload your own audio file. 
+Just a quick test of our functions: [English ASR](link/hubdetail?name=deepspeech2_aishell&en_category=AutomaticSpeechRecognition) and [English TTS](link/hubdetail?name=fastspeech2_baker&en_category=TextToSpeech) by typing message or upload your own audio file.
 
 Developers can have a try of our model with only a few lines of code.
 
@@ -87,7 +87,7 @@ bash local/test.sh conf/deepspeech2.yaml ckptfile offline
 ```
 
 For *TTS*, try FastSpeech2 on LJSpeech:
-- Download LJSpeech-1.1 from the [ljspeech official website](https://keithito.com/LJ-Speech-Dataset/), our prepared durations for fastspeech2 [ljspeech_alignment](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz). 
+- Download LJSpeech-1.1 from the [ljspeech official website](https://keithito.com/LJ-Speech-Dataset/), our prepared durations for fastspeech2 [ljspeech_alignment](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz).
 - The pretrained models are seperated into two parts: [fastspeech2_nosil_ljspeech_ckpt](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_ljspeech_ckpt_0.5.zip) and [pwg_ljspeech_ckpt](https://paddlespeech.bj.bcebos.com/Parakeet/pwg_ljspeech_ckpt_0.5.zip). Please download then unzip to `./model/fastspeech2` and `./model/pwg` respectively.
 - Assume your path to the dataset is `~/datasets/LJSpeech-1.1` and `./ljspeech_alignment` accordingly, preprocess your data and then use our pretrained model to synthesize:
 ```shell
@@ -106,7 +106,7 @@ PaddleSpeech supports a series of most popular models, summarized in [released m
 ASR module contains *Acoustic Model* and *Language Model*, with the following details:
 
 <!---
-The current hyperlinks redirect to [Previous Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples). 
+The current hyperlinks redirect to [Previous Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples).
 -->
 
 > Note: The `Link` should be code path rather than download links.
diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py
index 6424cfdf3..710630a78 100644
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -153,8 +153,12 @@ class DeepSpeech2Trainer(Trainer):
     def setup_model(self):
         config = self.config.clone()
         with UpdateConfig(config):
-            config.model.feat_size = self.train_loader.collate_fn.feature_size
-            config.model.dict_size = self.train_loader.collate_fn.vocab_size
+            if self.train:
+                config.model.feat_size = self.train_loader.collate_fn.feature_size
+                config.model.dict_size = self.train_loader.collate_fn.vocab_size
+            else:
+                config.model.feat_size = self.test_loader.collate_fn.feature_size
+                config.model.dict_size = self.test_loader.collate_fn.vocab_size
 
         if self.args.model_type == 'offline':
             model = DeepSpeech2Model.from_config(config.model)
@@ -189,7 +193,6 @@ class DeepSpeech2Trainer(Trainer):
         self.lr_scheduler = lr_scheduler
         logger.info("Setup optimizer/lr_scheduler!")
 
-
     def setup_dataloader(self):
         config = self.config.clone()
         config.defrost()
diff --git a/examples/aishell/s0/local/test.sh b/examples/aishell/s0/local/test.sh
index 2ae0740b3..64d725030 100755
--- a/examples/aishell/s0/local/test.sh
+++ b/examples/aishell/s0/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_ch.sh
+bash local/download_lm_ch.sh > dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/aishell/s0/local/test_export.sh b/examples/aishell/s0/local/test_export.sh
index a9a6b122d..71469753d 100755
--- a/examples/aishell/s0/local/test_export.sh
+++ b/examples/aishell/s0/local/test_export.sh
@@ -13,7 +13,7 @@ jit_model_export_path=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_ch.sh
+bash local/download_lm_ch.sh > dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/librispeech/s0/local/test.sh b/examples/librispeech/s0/local/test.sh
index 4d00f30b8..25dd04374 100755
--- a/examples/librispeech/s0/local/test.sh
+++ b/examples/librispeech/s0/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_en.sh
+bash local/download_lm_en.sh > /dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/other/1xt2x/aishell/local/test.sh b/examples/other/1xt2x/aishell/local/test.sh
index 2ae0740b3..d539ac494 100755
--- a/examples/other/1xt2x/aishell/local/test.sh
+++ b/examples/other/1xt2x/aishell/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_ch.sh
+bash local/download_lm_ch.sh > /dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/other/1xt2x/baidu_en8k/local/test.sh b/examples/other/1xt2x/baidu_en8k/local/test.sh
index 4d00f30b8..25dd04374 100755
--- a/examples/other/1xt2x/baidu_en8k/local/test.sh
+++ b/examples/other/1xt2x/baidu_en8k/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_en.sh
+bash local/download_lm_en.sh > /dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/other/1xt2x/librispeech/local/test.sh b/examples/other/1xt2x/librispeech/local/test.sh
index 4d00f30b8..25dd04374 100755
--- a/examples/other/1xt2x/librispeech/local/test.sh
+++ b/examples/other/1xt2x/librispeech/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_en.sh
+bash local/download_lm_en.sh > /dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/tiny/s0/conf/deepspeech2.yaml b/examples/tiny/s0/conf/deepspeech2.yaml
index 621b372cb..58899a156 100644
--- a/examples/tiny/s0/conf/deepspeech2.yaml
+++ b/examples/tiny/s0/conf/deepspeech2.yaml
@@ -45,7 +45,7 @@ model:
   ctc_grad_norm_type: null
 
 training:
-  n_epoch: 10
+  n_epoch: 5
   accum_grad: 1
   lr: 1e-5 
   lr_decay: 0.8 
diff --git a/examples/tiny/s0/conf/deepspeech2_online.yaml b/examples/tiny/s0/conf/deepspeech2_online.yaml
index 5a8294adb..334b1d31c 100644
--- a/examples/tiny/s0/conf/deepspeech2_online.yaml
+++ b/examples/tiny/s0/conf/deepspeech2_online.yaml
@@ -47,7 +47,7 @@ model:
   ctc_grad_norm_type: null
 
 training:
-  n_epoch: 10
+  n_epoch: 5
   accum_grad: 1
   lr: 1e-5 
   lr_decay: 1.0 
diff --git a/examples/tiny/s0/local/test.sh b/examples/tiny/s0/local/test.sh
index 4d00f30b8..25dd04374 100755
--- a/examples/tiny/s0/local/test.sh
+++ b/examples/tiny/s0/local/test.sh
@@ -13,7 +13,7 @@ ckpt_prefix=$2
 model_type=$3
 
 # download language model
-bash local/download_lm_en.sh
+bash local/download_lm_en.sh > /dev/null 2>&1
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/tiny/s1/conf/chunk_confermer.yaml b/examples/tiny/s1/conf/chunk_confermer.yaml
index b14b4b212..c51866697 100644
--- a/examples/tiny/s1/conf/chunk_confermer.yaml
+++ b/examples/tiny/s1/conf/chunk_confermer.yaml
@@ -83,7 +83,7 @@ model:
 
 
 training:
-  n_epoch: 20
+  n_epoch: 5
   accum_grad: 1
   global_grad_clip: 5.0
   optim: adam
diff --git a/examples/tiny/s1/conf/chunk_transformer.yaml b/examples/tiny/s1/conf/chunk_transformer.yaml
index 38edbf358..29c30b262 100644
--- a/examples/tiny/s1/conf/chunk_transformer.yaml
+++ b/examples/tiny/s1/conf/chunk_transformer.yaml
@@ -76,7 +76,7 @@ model:
 
 
 training:
-  n_epoch: 20
+  n_epoch: 5
   accum_grad: 1
   global_grad_clip: 5.0
   optim: adam
diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml
index 0b06b2b72..8487da771 100644
--- a/examples/tiny/s1/conf/conformer.yaml
+++ b/examples/tiny/s1/conf/conformer.yaml
@@ -79,7 +79,7 @@ model:
 
 
 training:
-  n_epoch: 20
+  n_epoch: 5
   accum_grad: 4
   global_grad_clip: 5.0
   optim: adam
diff --git a/examples/tiny/s1/conf/transformer.yaml b/examples/tiny/s1/conf/transformer.yaml
index 1c6f9e022..cc9b5c515 100644
--- a/examples/tiny/s1/conf/transformer.yaml
+++ b/examples/tiny/s1/conf/transformer.yaml
@@ -73,7 +73,7 @@ model:
 
 
 training:
-  n_epoch: 21
+  n_epoch: 5
   accum_grad: 1
   global_grad_clip: 5.0
   optim: adam