diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md index 4cea85b1..bd6572f1 100644 --- a/paddlespeech/cli/README.md +++ b/paddlespeech/cli/README.md @@ -5,5 +5,5 @@ ## Help `paddlespeech help` - ## S2T - `paddlespeech s2t --config ./s2t.yaml --input ./zh.wav --device gpu` + ## ASR + `paddlespeech asr --input ./test_audio.wav --device gpu` diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index 1cc7e27f..7e032904 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -11,6 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .asr import ASRExecutor from .base_commands import BaseCommand from .base_commands import HelpCommand -from .s2t import S2TExecutor diff --git a/paddlespeech/cli/s2t/__init__.py b/paddlespeech/cli/asr/__init__.py similarity index 95% rename from paddlespeech/cli/s2t/__init__.py rename to paddlespeech/cli/asr/__init__.py index 57e814b9..8ab0991f 100644 --- a/paddlespeech/cli/s2t/__init__.py +++ b/paddlespeech/cli/asr/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .infer import S2TExecutor +from .infer import ASRExecutor diff --git a/paddlespeech/cli/s2t/infer.py b/paddlespeech/cli/asr/infer.py similarity index 95% rename from paddlespeech/cli/s2t/infer.py rename to paddlespeech/cli/asr/infer.py index b3507cb6..60516380 100644 --- a/paddlespeech/cli/s2t/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -33,7 +33,7 @@ from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.utility import UpdateConfig -__all__ = ['S2TExecutor'] +__all__ = ['ASRExecutor'] pretrained_models = { "wenetspeech_zh": { @@ -58,13 +58,15 @@ model_alias = { @cli_register( - name='paddlespeech.s2t', description='Speech to text infer command.') -class S2TExecutor(BaseExecutor): + name='paddlespeech.asr', description='Speech to text infer command.') +class ASRExecutor(BaseExecutor): def __init__(self): - super(S2TExecutor, self).__init__() + super(ASRExecutor, self).__init__() self.parser = argparse.ArgumentParser( - prog='paddlespeech.s2t', add_help=True) + prog='paddlespeech.asr', add_help=True) + self.parser.add_argument( + '--input', type=str, required=True, help='Audio file to recognize.') self.parser.add_argument( '--model', type=str, @@ -76,16 +78,12 @@ class S2TExecutor(BaseExecutor): '--config', type=str, default=None, - help='Config of s2t task. Use deault config when it is None.') + help='Config of asr task. Use deault config when it is None.') self.parser.add_argument( '--ckpt_path', type=str, default=None, help='Checkpoint file of model.') - self.parser.add_argument( - '--input', - type=str, - help='Audio file to recognize.') self.parser.add_argument( '--device', type=str, @@ -178,13 +176,12 @@ class S2TExecutor(BaseExecutor): def preprocess(self, input: Union[str, os.PathLike]): """ Input preprocess and return paddle.Tensor stored in self.input. - Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ parser_args = self.parser_args config = self.config audio_file = input - #print("audio_file", audio_file) logger.info("audio_file" + audio_file) self.sr = config.collator.target_sample_rate @@ -290,7 +287,6 @@ class S2TExecutor(BaseExecutor): Command line entry. """ self.parser_args = self.parser.parse_args(argv) - print(self.parser_args) model = self.parser_args.model lang = self.parser_args.lang @@ -301,7 +297,7 @@ class S2TExecutor(BaseExecutor): try: res = self(model, lang, config, ckpt_path, audio_file, device) - print(res) + logger.info('ASR Result: {}'.format(res)) return True except Exception as e: print(e) @@ -314,6 +310,6 @@ class S2TExecutor(BaseExecutor): self._init_from_path(model, lang, config, ckpt_path) self.preprocess(audio_file) self.infer() - res = self.postprocess() # Retrieve result of s2t. + res = self.postprocess() # Retrieve result of asr. return res diff --git a/paddlespeech/cli/entry.py b/paddlespeech/cli/entry.py index 726cff1a..32123ece 100644 --- a/paddlespeech/cli/entry.py +++ b/paddlespeech/cli/entry.py @@ -23,9 +23,12 @@ def _CommandDict(): def _execute(): com = commands - for idx, _argv in enumerate(['paddlespeech'] + sys.argv[1:]): + + idx = 0 + for _argv in (['paddlespeech'] + sys.argv[1:]): if _argv not in com: break + idx += 1 com = com[_argv] # The method 'execute' of a command instance returns 'True' for a success diff --git a/paddlespeech/cli/executor.py b/paddlespeech/cli/executor.py index 2314bd6d..e307a287 100644 --- a/paddlespeech/cli/executor.py +++ b/paddlespeech/cli/executor.py @@ -47,7 +47,7 @@ class BaseExecutor(ABC): def preprocess(self, input: Union[str, os.PathLike]): """ Input preprocess and return paddle.Tensor stored in self.input. - Input content can be a text(t2s), a file(s2t, cls) or a streaming(not supported yet). + Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ pass diff --git a/paddlespeech/cli/t2s/__init.__py b/paddlespeech/cli/tts/__init.__py similarity index 100% rename from paddlespeech/cli/t2s/__init.__py rename to paddlespeech/cli/tts/__init.__py