From e80c7411316d871b56ece0ce33039173c025afb7 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 23 Feb 2021 06:05:38 +0000
Subject: [PATCH] fix tester

---
 examples/aishell/local/infer.sh | 29 +++++------------------------
 examples/aishell/local/train.sh | 30 +-----------------------------
 model_utils/model.py            | 28 ++++++++++++++++++++++++++--
 model_utils/network.py          | 18 ++++++++++++++++++
 training/trainer.py             |  7 +++++++
 5 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/examples/aishell/local/infer.sh b/examples/aishell/local/infer.sh
index 90be581be..d794ccc4e 100644
--- a/examples/aishell/local/infer.sh
+++ b/examples/aishell/local/infer.sh
@@ -9,31 +9,12 @@ if [ $? -ne 0 ]; then
 fi
 cd - > /dev/null
 
-
-# infer
-CUDA_VISIBLE_DEVICES=0 \
 python3 -u ${MAIN_ROOT}/infer.py \
---num_samples=10 \
---beam_size=300 \
---num_proc_bsearch=8 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=1024 \
---alpha=2.6 \
---beta=5.0 \
---cutoff_prob=0.99 \
---cutoff_top_n=40 \
---use_gru=True \
---use_gpu=True \
---share_rnn_weights=False \
---infer_manifest="data/manifest.test" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="data/vocab.txt" \
---model_path="checkpoints/step_final" \
---lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
---decoding_method="ctc_beam_search" \
---error_rate_type="cer" \
---specgram_type="linear"
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
+--checkpoint_path ckpt/checkpoints/step-3283
+
 
 if [ $? -ne 0 ]; then
     echo "Failed in inference!"
diff --git a/examples/aishell/local/train.sh b/examples/aishell/local/train.sh
index bbe69b8b1..ce30c4a11 100644
--- a/examples/aishell/local/train.sh
+++ b/examples/aishell/local/train.sh
@@ -4,39 +4,11 @@
 # if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 
-#CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-#python3 -u ${MAIN_ROOT}/train.py \
-#--batch_size=64 \
-#--num_epoch=50 \
-#--num_conv_layers=2 \
-#--num_rnn_layers=3 \
-#--rnn_layer_size=1024 \
-#--num_iter_print=100 \
-#--save_epoch=1 \
-#--num_samples=120000 \
-#--learning_rate=5e-4 \
-#--max_duration=27.0 \
-#--min_duration=0.0 \
-#--test_off=False \
-#--use_sortagrad=True \
-#--use_gru=True \
-#--use_gpu=True \
-#--is_local=True \
-#--share_rnn_weights=False \
-#--train_manifest="data/manifest.train" \
-#--dev_manifest="data/manifest.dev" \
-#--mean_std_path="data/mean_std.npz" \
-#--vocab_path="data/vocab.txt" \
-#--output_model_dir="./checkpoints" \
-#--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
-#--specgram_type="linear" \
-#--shuffle_method="batch_shuffle_clipped" \
-
 python3 -u ${MAIN_ROOT}/train.py \
 --device 'gpu' \
 --nproc 4 \
 --config conf/deepspeech2.yaml \
---output ckpt
+--output ckpt-${1}
 
 
 if [ $? -ne 0 ]; then
diff --git a/model_utils/model.py b/model_utils/model.py
index b60e87883..d4106f344 100644
--- a/model_utils/model.py
+++ b/model_utils/model.py
@@ -21,6 +21,7 @@ import logging
 import numpy as np
 from collections import defaultdict
 from functools import partial
+from pathlib import Path
 
 import paddle
 from paddle import distributed as dist
@@ -449,6 +450,30 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
             error_rate_type, num_ins, num_ins, errors_sum / len_refs)
         self.logger.info(msg)
 
+    def setup_output_dir(self):
+        """Create a directory used for output.
+        """
+        # output dir
+        if self.args.output:
+            output_dir = Path(self.args.output).expanduser() / "infer"
+            output_dir.mkdir(parents=True, exist_ok=True)
+        else:
+            output_dir = Path(self.args.checkpoint_path).expanduser().parent / "infer"
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+        self.output_dir = output_dir
+
+    # def setup_checkpointer(self):
+    #     """Create a directory used to save checkpoints into.
+        
+    #     It is "checkpoints" inside the output directory.
+    #     """
+    #     # checkpoint dir
+    #     checkpoint_dir = self.output_dir / "checkpoints"
+    #     checkpoint_dir.mkdir(exist_ok=True)
+
+    #     self.checkpoint_dir = checkpoint_dir
+
     def setup(self):
         """Setup the experiment.
         """
@@ -458,7 +483,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
 
         self.setup_output_dir()
         self.setup_logger()
-        self.setup_checkpointer()
 
         self.setup_dataloader()
         self.setup_model()
@@ -482,7 +506,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
             num_rnn_layers=config.model.num_rnn_layers,
             rnn_size=config.model.rnn_layer_size,
             share_rnn_weights=config.model.share_rnn_weights)
-
+        
         if self.parallel:
             model = paddle.DataParallel(model)
 
diff --git a/model_utils/network.py b/model_utils/network.py
index 03c6163e5..0ceb11cce 100644
--- a/model_utils/network.py
+++ b/model_utils/network.py
@@ -688,6 +688,24 @@ class DeepSpeech2(nn.Layer):
             probs, vocab_list, decoding_method, lang_model_path, beam_alpha,
             beam_beta, beam_size, cutoff_prob, cutoff_top_n, num_processes)
 
+    def from_pretrained(self, checkpoint_path):
+        """Build a model from a pretrained model.
+        Parameters
+        ----------
+        model: nn.Layer
+            Asr Model.
+        
+        checkpoint_path: Path or str
+            The path of pretrained model checkpoint, without extension name.
+        
+        Returns
+        -------
+        Model
+            The model build from pretrined result.
+        """
+        checkpoint.load_parameters(self, checkpoint_path=checkpoint_path)
+        return model
+
 
 def ctc_loss(logits,
              labels,
diff --git a/training/trainer.py b/training/trainer.py
index 930b82818..a64924c97 100644
--- a/training/trainer.py
+++ b/training/trainer.py
@@ -91,6 +91,9 @@ class Trainer():
         self.args = args
         self.optimizer = None
         self.visualizer = None
+        self.output_dir = None
+        self.checkpoint_dir = None
+        self.logger = None
 
     def setup(self):
         """Setup the experiment.
@@ -258,6 +261,10 @@ class Trainer():
         stream_handler.setFormatter(formatter)
         logger.addHandler(stream_handler)
 
+        if not hasattr(self, 'output_dir'):
+            self.logger = logger
+            return
+
         log_file = self.output_dir / 'worker_{}.log'.format(dist.get_rank())
         # file_handler = logging.FileHandler(str(log_file))
         # file_handler.setFormatter(formatter)