PaddleSpeech/paddlespeech/cli/executor.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from abc import ABC
from abc import abstractmethod
from typing import Any
from typing import Dict
from typing import List
from typing import Union

import paddle


class BaseExecutor(ABC):
    """
        An abstract executor of paddlespeech tasks.
    """

    def __init__(self):
        self._inputs = dict()
        self._outputs = dict()

    @abstractmethod
    def _get_pretrained_path(self, tag: str) -> os.PathLike:
        """
        Download and returns pretrained resources path of current task.

        Args:
            tag (str): A tag of pretrained model.

        Returns:
            os.PathLike: The path on which resources of pretrained model locate. 
        """
        pass

    @abstractmethod
    def _init_from_path(self, *args, **kwargs):
        """
        Init model and other resources from arguments. This method should be called by `__call__()`.
        """
        pass

    @abstractmethod
    def preprocess(self, input: Any, *args, **kwargs):
        """
        Input preprocess and return paddle.Tensor stored in self._inputs.
        Input content can be a text(tts), a file(asr, cls), a stream(not supported yet) or anything needed.

        Args:
            input (Any): Input text/file/stream or other content.
        """
        pass

    @paddle.no_grad()
    @abstractmethod
    def infer(self, *args, **kwargs):
        """
        Model inference and put results into self._outputs.
        This method get input tensors from self._inputs, and write output tensors into self._outputs.
        """
        pass

    @abstractmethod
    def postprocess(self, *args, **kwargs) -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        pass

    @abstractmethod
    def execute(self, argv: List[str]) -> bool:
        """
        Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`.

        Args:
            argv (List[str]): Arguments from command line.

        Returns:
            int: Result of the command execution. `True` for a success and `False` for a failure.
        """
        pass

    @abstractmethod
    def __call__(self, *arg, **kwargs):
        """
        Python API to call an executor.
        """
        pass

    def _is_job_input(self, input_: Union[str, os.PathLike]) -> bool:
        """
        Check if current input file is a job input or not.

        Args:
            input_ (Union[str, os.PathLike]): Input file of current task.

        Returns:
            bool: return `True` for job input, `False` otherwise.
        """
        return os.path.isfile(input_) and input_.endswith('.job')

    def _job_preprocess(self, job_input: os.PathLike) -> Dict[str, str]:
        """
        Read a job input file and return its contents in a dictionary.

        Args:
            job_input (os.PathLike): The job input file.

        Returns:
            Dict[str, str]: Contents of job input.
        """
        job_contents = {}
        with open(job_input) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                k, v = line.split(' ')
                job_contents[k] = v
        return job_contents

    def _job_postprecess(self, job_outputs: Dict[str, str]) -> str:
        """
        Convert job results to string.

        Args:
            job_outputs (Dict[str, str]): A dictionary with job ids and results.

        Returns:
            str: A string object contains job outputs.
        """
        ret = ''
        for k, v in job_outputs.items():
            ret += f'{k} {v}\n'
        return ret
Add paddlespeech.cli. 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`import os`
			`from abc import ABC`
			`from abc import abstractmethod`
Update doc strings. 3 years ago			`from typing import Any`
Support batch input in cls task. 3 years ago			`from typing import Dict`
Add python api for executor. 3 years ago			`from typing import List`
Add paddlespeech.cli. 3 years ago			`from typing import Union`

			`import paddle`


			`class BaseExecutor(ABC):`
			`"""`
			`An abstract executor of paddlespeech tasks.`
			`"""`

			`def __init__(self):`
Update inputs and outputs of executor. 3 years ago			`self._inputs = dict()`
			`self._outputs = dict()`
Add paddlespeech.cli. 3 years ago
			`@abstractmethod`
Update asr inference in paddlespeech.cli. 3 years ago			`def _get_pretrained_path(self, tag: str) -> os.PathLike:`
Add paddlespeech.cli. 3 years ago			`"""`
Update doc strings. 3 years ago			`Download and returns pretrained resources path of current task.`

			`Args:`
			`tag (str): A tag of pretrained model.`

			`Returns:`
			`os.PathLike: The path on which resources of pretrained model locate.`
Add paddlespeech.cli. 3 years ago			`"""`
			`pass`

			`@abstractmethod`
Update asr inference in paddlespeech.cli. 3 years ago			`def _init_from_path(self, args, *kwargs):`
Add paddlespeech.cli. 3 years ago			`"""`
Update doc strings. 3 years ago			Init model and other resources from arguments. This method should be called by `__call__()`.
Add paddlespeech.cli. 3 years ago			`"""`
			`pass`

			`@abstractmethod`
Update doc strings. 3 years ago			`def preprocess(self, input: Any, args, *kwargs):`
Add paddlespeech.cli. 3 years ago			`"""`
Update doc strings. 3 years ago			`Input preprocess and return paddle.Tensor stored in self._inputs.`
			`Input content can be a text(tts), a file(asr, cls), a stream(not supported yet) or anything needed.`

			`Args:`
			`input (Any): Input text/file/stream or other content.`
Add paddlespeech.cli. 3 years ago			`"""`
			`pass`

			`@paddle.no_grad()`
			`@abstractmethod`
Update doc strings. 3 years ago			`def infer(self, args, *kwargs):`
Add paddlespeech.cli. 3 years ago			`"""`
Update doc strings. 3 years ago			`Model inference and put results into self._outputs.`
			`This method get input tensors from self._inputs, and write output tensors into self._outputs.`
Add paddlespeech.cli. 3 years ago			`"""`
			`pass`

			`@abstractmethod`
Update doc strings. 3 years ago			`def postprocess(self, args, *kwargs) -> Union[str, os.PathLike]:`
Add paddlespeech.cli. 3 years ago			`"""`
Update doc strings. 3 years ago			`Output postprocess and return results.`
			`This method get model output from self._outputs and convert it into human-readable results.`

			`Returns:`
			`Union[str, os.PathLike]: Human-readable results such as texts and audio files.`
Add paddlespeech.cli. 3 years ago			`"""`
			`pass`
Add python api for executor. 3 years ago
			`@abstractmethod`
			`def execute(self, argv: List[str]) -> bool:`
			`"""`
Update doc strings. 3 years ago			Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`.

			`Args:`
			`argv (List[str]): Arguments from command line.`

			`Returns:`
			int: Result of the command execution. `True` for a success and `False` for a failure.
Add python api for executor. 3 years ago			`"""`
			`pass`

			`@abstractmethod`
			`def __call__(self, arg, *kwargs):`
			`"""`
Update doc strings. 3 years ago			`Python API to call an executor.`
Add python api for executor. 3 years ago			`"""`
			`pass`
Support batch input in cls task. 3 years ago
			`def _is_job_input(self, input_: Union[str, os.PathLike]) -> bool:`
			`"""`
			`Check if current input file is a job input or not.`

			`Args:`
			`input_ (Union[str, os.PathLike]): Input file of current task.`

			`Returns:`
			bool: return `True` for job input, `False` otherwise.
			`"""`
			`return os.path.isfile(input_) and input_.endswith('.job')`

			`def _job_preprocess(self, job_input: os.PathLike) -> Dict[str, str]:`
			`"""`
			`Read a job input file and return its contents in a dictionary.`

			`Args:`
			`job_input (os.PathLike): The job input file.`

			`Returns:`
			`Dict[str, str]: Contents of job input.`
			`"""`
			`job_contents = {}`
			`with open(job_input) as f:`
			`for line in f:`
			`line = line.strip()`
			`if not line:`
			`continue`
			`k, v = line.split(' ')`
			`job_contents[k] = v`
			`return job_contents`

			`def _job_postprecess(self, job_outputs: Dict[str, str]) -> str:`
			`"""`
			`Convert job results to string.`

			`Args:`
			`job_outputs (Dict[str, str]): A dictionary with job ids and results.`

			`Returns:`
			`str: A string object contains job outputs.`
			`"""`
			`ret = ''`
			`for k, v in job_outputs.items():`
			`ret += f'{k} {v}\n'`
			`return ret`