From b0356ae4892c85984804ecc1fda1f9cf4d5018ac Mon Sep 17 00:00:00 2001
From: huangyuxin <hyxin2014@126.com>
Date: Thu, 2 Dec 2021 05:58:20 +0000
Subject: [PATCH 1/2] revise

---
 paddlespeech/cli/asr/infer.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 6ae03853..640cf729 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -22,6 +22,7 @@ import librosa
 import paddle
 import soundfile
 from yacs.config import CfgNode
+import numpy as np
 
 from ..executor import BaseExecutor
 from ..utils import cli_register
@@ -81,6 +82,7 @@ class ASRExecutor(BaseExecutor):
             "--sr",
             type=int,
             default=16000,
+            choices=[8000, 16000],
             help='Choose the audio sample rate of the model. 8000 or 16000')
         self.parser.add_argument(
             '--config',
@@ -131,13 +133,13 @@ class ASRExecutor(BaseExecutor):
             self.cfg_path = os.path.join(res_path,
                                          pretrained_models[tag]['cfg_path'])
             self.ckpt_path = os.path.join(res_path,
-                                          pretrained_models[tag]['ckpt_path'])
+                                          pretrained_models[tag]['ckpt_path'] + ".pdparams")
             logger.info(res_path)
             logger.info(self.cfg_path)
             logger.info(self.ckpt_path)
         else:
             self.cfg_path = os.path.abspath(cfg_path)
-            self.ckpt_path = os.path.abspath(ckpt_path)
+            self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams")
             res_path = os.path.dirname(
                 os.path.dirname(os.path.abspath(self.cfg_path)))
 
@@ -183,8 +185,7 @@ class ASRExecutor(BaseExecutor):
         self.model.eval()
 
         # load model
-        params_path = self.ckpt_path + ".pdparams"
-        model_dict = paddle.load(params_path)
+        model_dict = paddle.load(self.ckpt_path)
         self.model.set_state_dict(model_dict)
 
     def preprocess(self, model_type: str, input: Union[str, os.PathLike]):
@@ -227,11 +228,16 @@ class ASRExecutor(BaseExecutor):
                     audio = audio.mean(axis=1)
                 else:
                     audio = audio[:, 0]
+                # pcm16 -> pcm 32
                 audio = audio.astype("float32")
+                bits = np.iinfo(np.int16).bits
+                audio = audio / (2**(bits - 1))
                 audio = librosa.resample(audio, audio_sample_rate,
                                          self.sample_rate)
                 audio_sample_rate = self.sample_rate
-                audio = audio.astype("int16")
+                # pcm16 -> pcm 32
+                audio = audio * (2**(bits - 1))
+                audio = np.round(audio).astype("int16")
             else:
                 audio = audio[:, 0]
 
@@ -341,7 +347,7 @@ class ASRExecutor(BaseExecutor):
                 "The sample rate of the input file is not {}.\n \
                             The program will resample the wav file to {}.\n \
                             If the result does not meet your expectations，\n \
-                            Please input the 16k 16bit 1 channel wav file. \
+                            Please input the 16k 16 bit 1 channel wav file. \
                         "
                 .format(self.sample_rate, self.sample_rate))
             while (True):

From 8ec576f477603269a677d11ddd87a56163a656aa Mon Sep 17 00:00:00 2001
From: Jackwaterveg <87408988+Jackwaterveg@users.noreply.github.com>
Date: Thu, 2 Dec 2021 15:03:04 +0800
Subject: [PATCH 2/2] Update infer.py

---
 paddlespeech/cli/asr/infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 640cf729..66a2f169 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -235,7 +235,7 @@ class ASRExecutor(BaseExecutor):
                 audio = librosa.resample(audio, audio_sample_rate,
                                          self.sample_rate)
                 audio_sample_rate = self.sample_rate
-                # pcm16 -> pcm 32
+                # pcm32 -> pcm 16
                 audio = audio * (2**(bits - 1))
                 audio = np.round(audio).astype("int16")
             else: