Rename s2t to asr.

3 years ago · 000294132c
parent 03f5c7a544
commit 000294132c
7 changed files with 20 additions and 21 deletions
--- a/paddlespeech/cli/README.md
+++ b/paddlespeech/cli/README.md
@ -5,5 +5,5 @@
 ## Help
 `paddlespeech help`

- ## S2T
- `paddlespeech s2t --config ./s2t.yaml --input ./zh.wav --device gpu`
+ ## ASR
+ `paddlespeech asr --input ./test_audio.wav --device gpu`
--- a/paddlespeech/cli/init.py
+++ b/paddlespeech/cli/init.py
@ -11,6 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .asr import ASRExecutor
 from .base_commands import BaseCommand
 from .base_commands import HelpCommand
-from .s2t import S2TExecutor
--- a/paddlespeech/cli/asr/init.py
+++ b/paddlespeech/cli/asr/init.py
@ -11,4 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .infer import S2TExecutor
+from .infer import ASRExecutor
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@ -33,7 +33,7 @@ from paddlespeech.s2t.transform.transformation import Transformation
 from paddlespeech.s2t.utils.dynamic_import import dynamic_import
 from paddlespeech.s2t.utils.utility import UpdateConfig

-__all__ = ['S2TExecutor']
+__all__ = ['ASRExecutor']

 pretrained_models = {
    "wenetspeech_zh": {
@ -58,13 +58,15 @@ model_alias = {


@cli_register(
-    name='paddlespeech.s2t', description='Speech to text infer command.')
-class S2TExecutor(BaseExecutor):
+    name='paddlespeech.asr', description='Speech to text infer command.')
+class ASRExecutor(BaseExecutor):
    def __init__(self):
-        super(S2TExecutor, self).__init__()
+        super(ASRExecutor, self).__init__()

        self.parser = argparse.ArgumentParser(
-            prog='paddlespeech.s2t', add_help=True)
+            prog='paddlespeech.asr', add_help=True)
+        self.parser.add_argument(
+            '--input', type=str, required=True, help='Audio file to recognize.')
        self.parser.add_argument(
            '--model',
            type=str,
@ -76,16 +78,12 @@ class S2TExecutor(BaseExecutor):
            '--config',
            type=str,
            default=None,
-            help='Config of s2t task. Use deault config when it is None.')
+            help='Config of asr task. Use deault config when it is None.')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
-        self.parser.add_argument(
-            '--input',
-            type=str,
-            help='Audio file to recognize.')
        self.parser.add_argument(
            '--device',
            type=str,
@ -178,13 +176,12 @@ class S2TExecutor(BaseExecutor):
    def preprocess(self, input: Union[str, os.PathLike]):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
-            Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet).
+            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """

        parser_args = self.parser_args
        config = self.config
        audio_file = input
-        #print("audio_file", audio_file)
        logger.info("audio_file" + audio_file)

        self.sr = config.collator.target_sample_rate
@ -290,7 +287,6 @@ class S2TExecutor(BaseExecutor):
            Command line entry.
        """
        self.parser_args = self.parser.parse_args(argv)
-        print(self.parser_args)

        model = self.parser_args.model
        lang = self.parser_args.lang
@ -301,7 +297,7 @@ class S2TExecutor(BaseExecutor):

        try:
            res = self(model, lang, config, ckpt_path, audio_file, device)
-            print(res)
+            logger.info('ASR Result: {}'.format(res))
            return True
        except Exception as e:
            print(e)
@ -314,6 +310,6 @@ class S2TExecutor(BaseExecutor):
        self._init_from_path(model, lang, config, ckpt_path)
        self.preprocess(audio_file)
        self.infer()
-        res = self.postprocess()  # Retrieve result of s2t.
+        res = self.postprocess()  # Retrieve result of asr.

        return res
--- a/paddlespeech/cli/entry.py
+++ b/paddlespeech/cli/entry.py
@ -23,9 +23,12 @@ def _CommandDict():

 def _execute():
    com = commands
-    for idx, _argv in enumerate(['paddlespeech'] + sys.argv[1:]):
+
+    idx = 0
+    for _argv in (['paddlespeech'] + sys.argv[1:]):
        if _argv not in com:
            break
+        idx += 1
        com = com[_argv]

    # The method 'execute' of a command instance returns 'True' for a success
--- a/paddlespeech/cli/executor.py
+++ b/paddlespeech/cli/executor.py
@ -47,7 +47,7 @@ class BaseExecutor(ABC):
    def preprocess(self, input: Union[str, os.PathLike]):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
-            Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet).
+            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """
        pass

--- a/paddlespeech/cli/tts/init.py
+++ b/paddlespeech/cli/tts/init.py