fix coding style && mv audio docs

3 years ago · 750e0bdddd
parent e66d1b7d96
commit 750e0bdddd
91 changed files with 781 additions and 1116 deletions
--- a/audio/docs/Makefile
+++ b/audio/docs/Makefile
@ -1,19 +0,0 @@
 # Minimal makefile for Sphinx documentation
 #
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 .PHONY: help Makefile
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/audio/docs/README.md
+++ b/audio/docs/README.md
@ -1,24 +0,0 @@
 # Build docs for PaddleAudio
 Execute the following steps in **current directory**.
 ## 1. Install
 `pip install Sphinx sphinx_rtd_theme`
 ## 2. Generate API docs
 Generate API docs from doc string.
 `sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates`
 ## 3. Build
 `sphinx-build source _html`
 ## 4. Preview
 Open `_html/index.html` for page preview.
--- a/audio/docs/images/paddle.png
+++ b/audio/docs/images/paddle.png
--- a/audio/docs/make.bat
+++ b/audio/docs/make.bat
@ -1,35 +0,0 @@
@ECHO OFF
 pushd %~dp0
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set SOURCEDIR=source
 set BUILDDIR=build
 if "%1" == "" goto help
 %SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
 goto end
 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
 :end
 popd
--- a/audio/docs/source/_static/custom.css
+++ b/audio/docs/source/_static/custom.css
@ -1,5 +0,0 @@
 .wy-nav-content {
    max-width: 80%;
 }
 .table table{ background:#b9b9b9} 
 .table table td{ background:#FFF; } 
--- a/audio/docs/source/_templates/module.rst_t
+++ b/audio/docs/source/_templates/module.rst_t
@ -1,9 +0,0 @@
 {%- if show_headings %}
 {{- basename | e | heading }}
 {% endif -%}
 .. automodule:: {{ qualname }}
 {%- for option in automodule_options %}
   :{{ option }}:
 {%- endfor %}
--- a/audio/docs/source/_templates/package.rst_t
+++ b/audio/docs/source/_templates/package.rst_t
@ -1,57 +0,0 @@
 {%- macro automodule(modname, options) -%}
 .. automodule:: {{ modname }}
 {%- for option in options %}
   :{{ option }}:
 {%- endfor %}
 {%- endmacro %}
 {%- macro toctree(docnames) -%}
 .. toctree::
   :maxdepth: {{ maxdepth }}
 {% for docname in docnames %}
   {{ docname }}
 {%- endfor %}
 {%- endmacro %}
 {%- if is_namespace %}
 {{- [pkgname, "namespace"] | join(" ") | e | heading }}
 {% else %}
 {{- pkgname | e | heading }}
 {% endif %}
 {%- if is_namespace %}
 .. py:module:: {{ pkgname }}
 {% endif %}
 {%- if modulefirst and not is_namespace %}
 {{ automodule(pkgname, automodule_options) }}
 {% endif %}
 {%- if subpackages %}
 Subpackages
 -----------
 {{ toctree(subpackages) }}
 {% endif %}
 {%- if submodules %}
 Submodules
 ----------
 {% if separatemodules %}
 {{ toctree(submodules) }}
 {% else %}
 {%- for submodule in submodules %}
 {% if show_headings %}
 {{- submodule | e | heading(2) }}
 {% endif %}
 {{ automodule(submodule, automodule_options) }}
 {% endfor %}
 {%- endif %}
 {%- endif %}
 {%- if not modulefirst and not is_namespace %}
 Module contents
 ---------------
 {{ automodule(pkgname, automodule_options) }}
 {% endif %}
--- a/audio/docs/source/_templates/toc.rst_t
+++ b/audio/docs/source/_templates/toc.rst_t
@ -1,8 +0,0 @@
 {{ header | heading }}
 .. toctree::
   :maxdepth: {{ maxdepth }}
 {% for docname in docnames %}
   {{ docname }}
 {%- endfor %}
--- a/audio/docs/source/conf.py
+++ b/audio/docs/source/conf.py
@ -1,181 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
 #
 # This file does only contain a selection of the most common options. For a
 # full list see the documentation:
 # http://www.sphinx-doc.org/en/master/config
 # -- Path setup --------------------------------------------------------------
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../..'))
 # -- Project information -----------------------------------------------------
 project = 'PaddleAudio'
 copyright = '2022, PaddlePaddle'
 author = 'PaddlePaddle'
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
 release = '0.2.0'
 # -- General configuration ---------------------------------------------------
 # If your documentation needs a minimal Sphinx version, state it here.
 #
 # needs_sphinx = '1.0'
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.intersphinx',
    'sphinx.ext.mathjax',
    'sphinx.ext.viewcode',
    'sphinx.ext.napoleon',
 ]
 napoleon_google_docstring = True
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
 source_suffix = '.rst'
 # The master toctree document.
 master_doc = 'index'
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
 language = None
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
 # -- Options for HTML output -------------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
 import sphinx_rtd_theme
 html_theme = 'sphinx_rtd_theme'
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 smartquotes = False
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
 # html_theme_options = {}
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 html_logo = '../images/paddle.png'
 html_css_files = [
    'custom.css',
 ]
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
 #
 # The default sidebars (for documents that don't match any pattern) are
 # defined by theme itself.  Builtin themes are using these templates by
 # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
 # 'searchbox.html']``.
 #
 # html_sidebars = {}
 # -- Options for HTMLHelp output ---------------------------------------------
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'PaddleAudiodoc'
 # -- Options for LaTeX output ------------------------------------------------
 latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
 }
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation', 'PaddlePaddle',
     'manual'),
 ]
 # -- Options for manual page output ------------------------------------------
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [(master_doc, 'paddleaudio', 'PaddleAudio Documentation', [author],
              1)]
 # -- Options for Texinfo output ----------------------------------------------
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation', author,
     'PaddleAudio', 'One line description of project.', 'Miscellaneous'),
 ]
 # -- Options for Epub output -------------------------------------------------
 # Bibliographic Dublin Core info.
 epub_title = project
 # The unique identifier of the text. This can be a ISBN number
 # or the project homepage.
 #
 # epub_identifier = ''
 # A unique identification for the text.
 #
 # epub_uid = ''
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']
 # -- Extension configuration -------------------------------------------------
 # -- Options for intersphinx extension ---------------------------------------
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'https://docs.python.org/': None}
--- a/audio/docs/source/index.rst
+++ b/audio/docs/source/index.rst
@ -1,22 +0,0 @@
 .. PaddleAudio documentation master file, created by
   sphinx-quickstart on Tue Mar 22 15:57:16 2022.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.
 Welcome to PaddleAudio's documentation!
 =======================================
 .. toctree::
   :maxdepth: 1
   Index <self>
 API References
 --------------
 .. toctree::
   :maxdepth: 2
   :titlesonly:
   paddleaudio
--- a/audio/paddleaudio/init.py
+++ b/audio/paddleaudio/init.py
@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import backends
 from . import compliance
 from . import datasets
 from . import features
@ -18,4 +19,3 @@ from . import functional
 from . import io
 from . import metric
 from . import sox_effects
 from . import backends
--- a/audio/paddleaudio/backends/init.py
+++ b/audio/paddleaudio/backends/init.py
@ -11,14 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import utils
 from .soundfile_backend import depth_convert
 from .soundfile_backend import soundfile_load
 from .soundfile_backend import normalize
 from .soundfile_backend import resample
 from .soundfile_backend import soundfile_load
 from .soundfile_backend import soundfile_save
 from .soundfile_backend import to_mono
 from . import utils
 from .utils import get_audio_backend
 from .utils import list_audio_backends
 from .utils import set_audio_backend
--- a/audio/paddleaudio/backends/soundfile_backend.py
+++ b/audio/paddleaudio/backends/soundfile_backend.py
@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import warnings
 from typing import Optional
@ -204,6 +203,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
    wavfile.write(file, sr, y_out)
 def soundfile_load(
        file: os.PathLike,
        sr: Optional[int]=None,
@ -256,9 +256,13 @@ def soundfile_load(
    y = depth_convert(y, dtype)
    return y, r
 #the code below token form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py with modificaion.
-def _get_subtype_for_wav(dtype: paddle.dtype, encoding: str, bits_per_sample: int):
+
 def _get_subtype_for_wav(dtype: paddle.dtype,
                         encoding: str,
                         bits_per_sample: int):
    if not encoding:
        if not bits_per_sample:
            subtype = {
@ -315,7 +319,10 @@ def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
    raise ValueError(f"sph does not support {encoding}.")
-def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sample: int):
+def _get_subtype(dtype: paddle.dtype,
                 format: str,
                 encoding: str,
                 bits_per_sample: int):
    if format == "wav":
        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
    if format == "flac":
@ -328,7 +335,8 @@ def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sampl
        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
    if format in ("ogg", "vorbis"):
        if encoding or bits_per_sample:
-            raise ValueError("ogg/vorbis does not support encoding/bits_per_sample.")
+            raise ValueError(
                "ogg/vorbis does not support encoding/bits_per_sample.")
        return "VORBIS"
    if format == "sph":
        return _get_subtype_for_sphere(encoding, bits_per_sample)
@ -336,16 +344,16 @@ def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sampl
        return "PCM_16"
    raise ValueError(f"Unsupported format: {format}")
 def save(
        filepath: str,
        src: paddle.Tensor,
        sample_rate: int,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    compression: Optional[float] = None,
+        compression: Optional[float]=None,
-    format: Optional[str] = None,
+        format: Optional[str]=None,
-    encoding: Optional[str] = None,
+        encoding: Optional[str]=None,
-    bits_per_sample: Optional[int] = None,
+        bits_per_sample: Optional[int]=None, ):
 ):
    """Save audio data to file.
    Note:
@ -441,11 +449,11 @@ def save(
    if compression is not None:
        warnings.warn(
            '`save` function of "soundfile" backend does not support "compression" parameter. '
-            "The argument is silently ignored."
+            "The argument is silently ignored.")
        )
    if hasattr(filepath, "write"):
        if format is None:
-            raise RuntimeError("`format` is required when saving to file object.")
+            raise RuntimeError(
                "`format` is required when saving to file object.")
        ext = format.lower()
    else:
        ext = str(filepath).split(".")[-1].lower()
@ -455,8 +463,7 @@ def save(
    if bits_per_sample == 24:
        warnings.warn(
            "Saving audio with 24 bits per sample might warp samples near -1. "
-            "Using 16 bits per sample might be able to avoid this."
+            "Using 16 bits per sample might be able to avoid this.")
        )
    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
@ -467,7 +474,13 @@ def save(
    if channels_first:
        src = src.t()
-    soundfile.write(file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format)
+    soundfile.write(
        file=filepath,
        data=src,
        samplerate=sample_rate,
        subtype=subtype,
        format=format)
 _SUBTYPE2DTYPE = {
    "PCM_S8": "int8",
@ -478,14 +491,14 @@ _SUBTYPE2DTYPE = {
    "DOUBLE": "float64",
 }
 def load(
        filepath: str,
-    frame_offset: int = 0,
+        frame_offset: int=0,
-    num_frames: int = -1,
+        num_frames: int=-1,
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    format: Optional[str] = None,
+        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
 ) -> Tuple[paddle.Tensor, int]:
    """Load audio data from file.
    Note:
@ -564,7 +577,7 @@ def load(
    waveform = paddle.to_tensor(waveform)
    if channels_first:
-        waveform = paddle.transpose(waveform, perm=[1,0])
+        waveform = paddle.transpose(waveform, perm=[1, 0])
    return waveform, sample_rate
@ -588,7 +601,8 @@ _SUBTYPE_TO_BITS_PER_SAMPLE = {
    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "IMA_ADPCM": 0,  # IMA ADPCM.
    "MS_ADPCM": 0,  # Microsoft ADPCM.
-    "GSM610": 0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
+    "GSM610":
    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
@ -606,16 +620,17 @@ _SUBTYPE_TO_BITS_PER_SAMPLE = {
    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
 }
 def _get_bit_depth(subtype):
    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
        warnings.warn(
            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
            "attribute will be set to 0. If you are seeing this warning, please "
            "report by opening an issue on github (after checking for existing/closed ones). "
-            "You may otherwise ignore this warning."
+            "You may otherwise ignore this warning.")
        )
    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
 _SUBTYPE_TO_ENCODING = {
    "PCM_S8": "PCM_S",
    "PCM_16": "PCM_S",
@ -629,12 +644,14 @@ _SUBTYPE_TO_ENCODING = {
    "VORBIS": "VORBIS",
 }
 def _get_encoding(format: str, subtype: str):
    if format == "FLAC":
        return "FLAC"
    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")
-def info(filepath: str, format: Optional[str] = None) -> AudioInfo:
+
 def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
    """Get signal information of an audio file.
    Note:
@ -657,5 +674,4 @@ def info(filepath: str, format: Optional[str] = None) -> AudioInfo:
        sinfo.frames,
        sinfo.channels,
        bits_per_sample=_get_bit_depth(sinfo.subtype),
-        encoding=_get_encoding(sinfo.format, sinfo.subtype),
+        encoding=_get_encoding(sinfo.format, sinfo.subtype), )
    )
--- a/audio/paddleaudio/backends/sox_io_backend.py
+++ b/audio/paddleaudio/backends/sox_io_backend.py
@ -1,17 +1,17 @@
-from pathlib import Path
+import os
-from typing import Callable
+from typing import Optional
-from typing import Optional, Tuple, Union
+from typing import Tuple
 import paddle
 import paddleaudio
 from paddle import Tensor
 from .common import AudioInfo
 import os
 from paddleaudio._internal import module_utils as _mod_utils
 from .common import AudioInfo
 #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
 def _fail_info(filepath: str, format: Optional[str]) -> AudioInfo:
    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
@ -23,72 +23,77 @@ def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioInfo:
 # Note: need to comply TorchScript syntax -- need annotation and no f-string
 def _fail_load(
        filepath: str,
-    frame_offset: int = 0,
+        frame_offset: int=0,
-    num_frames: int = -1,
+        num_frames: int=-1,
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    format: Optional[str] = None,
+        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
 ) -> Tuple[Tensor, int]:
    raise RuntimeError("Failed to load audio from {}".format(filepath))
 def _fail_load_fileobj(fileobj, *args, **kwargs):
    raise RuntimeError(f"Failed to load audio from {fileobj}")
 _fallback_info = _fail_info
 _fallback_info_fileobj = _fail_info_fileobj
 _fallback_load = _fail_load
 _fallback_load_filebj = _fail_load_fileobj
@_mod_utils.requires_sox()
 def load(
        filepath: str,
-        frame_offset: int = 0,
+        frame_offset: int=0,
        num_frames: int=-1,
-        normalize: bool = True,
+        normalize: bool=True,
-        channels_first: bool = True,
+        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
    if hasattr(filepath, "read"):
        ret = paddleaudio._paddleaudio.load_audio_fileobj(
-            filepath, frame_offset, num_frames, normalize, channels_first, format
+            filepath, frame_offset, num_frames, normalize, channels_first,
-        )
+            format)
        if ret is not None:
            audio_tensor = paddle.to_tensor(ret[0])
            return (audio_tensor, ret[1])
-        return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+        return _fallback_load_fileobj(filepath, frame_offset, num_frames,
                                      normalize, channels_first, format)
    filepath = os.fspath(filepath)
    ret = paddleaudio._paddleaudio.sox_io_load_audio_file(
-        filepath, frame_offset, num_frames, normalize, channels_first, format
+        filepath, frame_offset, num_frames, normalize, channels_first, format)
    )
    if ret is not None:
        audio_tensor = paddle.to_tensor(ret[0])
        return (audio_tensor, ret[1])
-    return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)
+    return _fallback_load(filepath, frame_offset, num_frames, normalize,
                          channels_first, format)
@_mod_utils.requires_sox()
-def save(filepath: str,
+def save(
        filepath: str,
        src: Tensor,
        sample_rate: int,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    compression: Optional[float] = None,
+        compression: Optional[float]=None,
-    format: Optional[str] = None,
+        format: Optional[str]=None,
-    encoding: Optional[str] = None,
+        encoding: Optional[str]=None,
-    bits_per_sample: Optional[int] = None,
+        bits_per_sample: Optional[int]=None, ):
 ):
    src_arr = src.numpy()
    if hasattr(filepath, "write"):
        paddleaudio._paddleaudio.save_audio_fileobj(
-            filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+            filepath, src_arr, sample_rate, channels_first, compression, format,
-        )
+            encoding, bits_per_sample)
        return
    filepath = os.fspath(filepath)
    paddleaudio._paddleaudio.sox_io_save_audio_file(
-        filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+        filepath, src_arr, sample_rate, channels_first, compression, format,
-    )
+        encoding, bits_per_sample)
@_mod_utils.requires_sox()
-def info(filepath: str, format: Optional[str] = None,) -> AudioInfo:
+def info(
        filepath: str,
        format: Optional[str]=None, ) -> AudioInfo:
    if hasattr(filepath, "read"):
        sinfo = paddleaudio._paddleaudio.get_info_fileobj(filepath, format)
        if sinfo is not None:
--- a/audio/paddleaudio/backends/utils.py
+++ b/audio/paddleaudio/backends/utils.py
@ -1,6 +1,5 @@
 """Defines utilities for switching audio backends"""
 #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py
 import warnings
 from typing import List
 from typing import Optional
@ -8,7 +7,9 @@ from typing import Optional
 import paddleaudio
 from paddleaudio._internal import module_utils as _mod_utils
-from . import no_backend, soundfile_backend, sox_io_backend
+from . import no_backend
 from . import soundfile_backend
 from . import sox_io_backend
 __all__ = [
    "list_audio_backends",
@ -55,6 +56,7 @@ def set_audio_backend(backend: Optional[str]):
    for func in ["save", "load", "info"]:
        setattr(paddleaudio, func, getattr(module, func))
 def _init_audio_backend():
    backends = list_audio_backends()
    if "soundfile" in backends:
--- a/audio/paddleaudio/utils/init.py
+++ b/audio/paddleaudio/utils/init.py
@ -21,7 +21,7 @@ from .env import USER_HOME
 from .error import ParameterError
 from .log import Logger
 from .log import logger
 from .time import seconds_to_hms
 from .time import Timer
 from .numeric import depth_convert
 from .numeric import pcm16to32
 from .time import seconds_to_hms
 from .time import Timer
--- a/audio/tests/backends/soundfile/common.py
+++ b/audio/tests/backends/soundfile/common.py
@ -1,8 +1,8 @@
 import itertools
 from unittest import skipIf
 from parameterized import parameterized
 from paddleaudio._internal.module_utils import is_module_available
 from parameterized import parameterized
 def name_func(func, _, params):
@ -31,7 +31,8 @@ def skipIfFormatNotSupported(fmt):
 def parameterize(*params):
-    return parameterized.expand(list(itertools.product(*params)), name_func=name_func)
+    return parameterized.expand(
        list(itertools.product(*params)), name_func=name_func)
 def fetch_wav_subtype(dtype, encoding, bits_per_sample):
@ -54,4 +55,3 @@ def fetch_wav_subtype(dtype, encoding, bits_per_sample):
    if subtype:
        return subtype
    raise ValueError(f"wav does not support ({encoding}, {bits_per_sample}).")
--- a/audio/tests/backends/soundfile/info_test.py
+++ b/audio/tests/backends/soundfile/info_test.py
@ -1,37 +1,37 @@
 #this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/info_test.py
 import tarfile
 import warnings
 import unittest
 import warnings
 from unittest.mock import patch
 import paddle
-from paddleaudio._internal import module_utils as _mod_utils
+import soundfile
 from common import parameterize
 from common import skipIfFormatNotSupported
 from paddleaudio.backends import soundfile_backend
 from tests.backends.common import get_bits_per_sample, get_encoding 
 from tests.common_utils import (
    get_wav_data,
    nested_params,
    save_wav,
    TempDirMixin,
 )
-from common import parameterize, skipIfFormatNotSupported
+from tests.backends.common import get_bits_per_sample
-
+from tests.backends.common import get_encoding
-import soundfile
+from tests.common_utils import get_wav_data
 from tests.common_utils import nested_params
 from tests.common_utils import save_wav
 from tests.common_utils import TempDirMixin
 class TestInfo(TempDirMixin, unittest.TestCase):
    @parameterize(
        ["float32", "int32"],
        [8000, 16000],
-        [1, 2],
+        [1, 2], )
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.info` can check wav file correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
-        data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate)
+        data = get_wav_data(
            dtype,
            num_channels,
            normalize=False,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = soundfile_backend.info(path)
        assert info.sample_rate == sample_rate
@ -86,8 +86,7 @@ class TestInfo(TempDirMixin, unittest.TestCase):
    @nested_params(
        [8000, 16000],
        [1, 2],
-        [("PCM_24", 24), ("PCM_32", 32)],
+        [("PCM_24", 24), ("PCM_32", 32)], )
    )
    @skipIfFormatNotSupported("NIST")
    def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth):
        """`soundfile_backend.info` can check sph file correctly"""
@ -127,7 +126,8 @@ class TestInfo(TempDirMixin, unittest.TestCase):
            with warnings.catch_warnings(record=True) as w:
                info = soundfile_backend.info("foo")
                assert len(w) == 1
-                assert "UNSEEN_SUBTYPE subtype is unknown to PaddleAudio" in str(w[-1].message)
+                assert "UNSEEN_SUBTYPE subtype is unknown to PaddleAudio" in str(
                    w[-1].message)
                assert info.bits_per_sample == 0
@ -195,5 +195,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        """Query compressed audio via file-like object works"""
        self._test_tarobj("flac", "PCM_16", 16)
 if __name__ == '__main__':
    unittest.main()
--- a/audio/tests/backends/soundfile/load_test.py
+++ b/audio/tests/backends/soundfile/load_test.py
@ -1,28 +1,23 @@
 #this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/load_test.py
 import os
 import tarfile
 import unittest
 from unittest.mock import patch
 import numpy as np
-from parameterized import parameterized
+import numpy as np
 import paddle
-from paddleaudio._internal import module_utils as _mod_utils
+import soundfile
 from common import dtype2subtype
 from common import parameterize
 from common import skipIfFormatNotSupported
 from paddleaudio.backends import soundfile_backend
-from tests.backends.common import get_bits_per_sample, get_encoding 
+from parameterized import parameterized
 from tests.common_utils import (
    get_wav_data,
    load_wav,
    nested_params,
    normalize_wav,
    save_wav,
    TempDirMixin,
 )
 from common import dtype2subtype, parameterize, skipIfFormatNotSupported
-import soundfile
+from tests.common_utils import get_wav_data
 from tests.common_utils import load_wav
 from tests.common_utils import normalize_wav
 from tests.common_utils import save_wav
 from tests.common_utils import TempDirMixin
 def _get_mock_path(
@ -30,8 +25,7 @@ def _get_mock_path(
        dtype: str,
        sample_rate: int,
        num_channels: int,
-    num_frames: int,
+        num_frames: int, ):
 ):
    return f"{dtype}_{sample_rate}_{num_channels}_{num_frames}.{ext}"
@ -87,9 +81,8 @@ class SoundFileMock:
            self._params["num_channels"],
            normalize=False,
            num_frames=self._params["num_frames"],
-            channels_first=False,
+            channels_first=False, ).numpy()
-        ).numpy()
+        return data[self._start:self._start + frames]
        return data[self._start : self._start + frames]
    def __enter__(self):
        return self
@ -99,13 +92,17 @@ class SoundFileMock:
 class MockedLoadTest(unittest.TestCase):
-    def assert_dtype(self, ext, dtype, sample_rate, num_channels, normalize, channels_first):
+    def assert_dtype(self, ext, dtype, sample_rate, num_channels, normalize,
                     channels_first):
        """When format is WAV or NIST, normalize=False will return the native dtype Tensor, otherwise float32"""
        num_frames = 3 * sample_rate
        path = _get_mock_path(ext, dtype, sample_rate, num_channels, num_frames)
-        expected_dtype = paddle.float32 if normalize or ext not in ["wav", "nist"] else getattr(paddle, dtype)
+        expected_dtype = paddle.float32 if normalize or ext not in [
            "wav", "nist"
        ] else getattr(paddle, dtype)
        with patch("soundfile.SoundFile", SoundFileMock):
-            found, sr = soundfile_backend.load(path, normalize=normalize, channels_first=channels_first)
+            found, sr = soundfile_backend.load(
                path, normalize=normalize, channels_first=channels_first)
            assert found.dtype == expected_dtype
            assert sample_rate == sr
@ -114,32 +111,36 @@ class MockedLoadTest(unittest.TestCase):
        [8000, 16000],
        [1, 2],
        [True, False],
-        [True, False],
+        [True, False], )
-    )
+    def test_wav(self, dtype, sample_rate, num_channels, normalize,
-    def test_wav(self, dtype, sample_rate, num_channels, normalize, channels_first):
+                 channels_first):
        """Returns native dtype when normalize=False else float32"""
-        self.assert_dtype("wav", dtype, sample_rate, num_channels, normalize, channels_first)
+        self.assert_dtype("wav", dtype, sample_rate, num_channels, normalize,
                          channels_first)
    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
        [True, False],
-        [True, False],
+        [True, False], )
-    )
+    def test_sphere(self, dtype, sample_rate, num_channels, normalize,
-    def test_sphere(self, dtype, sample_rate, num_channels, normalize, channels_first):
+                    channels_first):
        """Returns float32 always"""
-        self.assert_dtype("sph", dtype, sample_rate, num_channels, normalize, channels_first)
+        self.assert_dtype("sph", dtype, sample_rate, num_channels, normalize,
                          channels_first)
    @parameterize([8000, 16000], [1, 2], [True, False], [True, False])
    def test_ogg(self, sample_rate, num_channels, normalize, channels_first):
        """Returns float32 always"""
-        self.assert_dtype("ogg", "int16", sample_rate, num_channels, normalize, channels_first)
+        self.assert_dtype("ogg", "int16", sample_rate, num_channels, normalize,
                          channels_first)
    @parameterize([8000, 16000], [1, 2], [True, False], [True, False])
    def test_flac(self, sample_rate, num_channels, normalize, channels_first):
        """`soundfile_backend.load` can load ogg format."""
-        self.assert_dtype("flac", "int16", sample_rate, num_channels, normalize, channels_first)
+        self.assert_dtype("flac", "int16", sample_rate, num_channels, normalize,
                          channels_first)
 class LoadTestBase(TempDirMixin, unittest.TestCase):
@ -150,8 +151,7 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            num_channels,
            normalize,
            channels_first=True,
-        duration=1,
+            duration=1, ):
    ):
        """`soundfile_backend.load` can load wav format correctly.
        Wav data loaded with soundfile backend should match those with scipy
@ -163,11 +163,12 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            num_channels,
            normalize=normalize,
            num_frames=num_frames,
-            channels_first=channels_first,
+            channels_first=channels_first, )
        )
        save_wav(path, data, sample_rate, channels_first=channels_first)
-        expected = load_wav(path, normalize=normalize, channels_first=channels_first)[0]
+        expected = load_wav(
-        data, sr = soundfile_backend.load(path, normalize=normalize, channels_first=channels_first)
+            path, normalize=normalize, channels_first=channels_first)[0]
        data, sr = soundfile_backend.load(
            path, normalize=normalize, channels_first=channels_first)
        assert sr == sample_rate
        np.testing.assert_array_almost_equal(data.numpy(), expected.numpy())
@ -177,8 +178,7 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            sample_rate,
            num_channels,
            channels_first=True,
-        duration=1,
+            duration=1, ):
    ):
        """`soundfile_backend.load` can load SPHERE format correctly."""
        path = self.get_temp_path("reference.sph")
        num_frames = duration * sample_rate
@ -187,9 +187,9 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            num_channels,
            num_frames=num_frames,
            normalize=False,
-            channels_first=False,
+            channels_first=False, )
-        )
+        soundfile.write(
-        soundfile.write(path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST")
+            path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST")
        expected = normalize_wav(raw.t() if channels_first else raw)
        data, sr = soundfile_backend.load(path, channels_first=channels_first)
        assert sr == sample_rate
@ -202,8 +202,7 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            sample_rate,
            num_channels,
            channels_first=True,
-        duration=1,
+            duration=1, ):
    ):
        """`soundfile_backend.load` can load FLAC format correctly."""
        path = self.get_temp_path("reference.flac")
        num_frames = duration * sample_rate
@ -212,8 +211,7 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
            num_channels,
            num_frames=num_frames,
            normalize=False,
-            channels_first=False,
+            channels_first=False, )
        )
        soundfile.write(path, raw, sample_rate)
        expected = normalize_wav(raw.t() if channels_first else raw)
        data, sr = soundfile_backend.load(path, channels_first=channels_first)
@ -222,7 +220,6 @@ class LoadTestBase(TempDirMixin, unittest.TestCase):
        np.testing.assert_array_almost_equal(data.numpy(), expected.numpy())
 class TestLoad(LoadTestBase):
    """Test the correctness of `soundfile_backend.load` for various formats"""
@ -231,29 +228,31 @@ class TestLoad(LoadTestBase):
        [8000, 16000],
        [1, 2],
        [False, True],
-        [False, True],
+        [False, True], )
-    )
+    def test_wav(self, dtype, sample_rate, num_channels, normalize,
-    def test_wav(self, dtype, sample_rate, num_channels, normalize, channels_first):
+                 channels_first):
        """`soundfile_backend.load` can load wav format correctly."""
-        self.assert_wav(dtype, sample_rate, num_channels, normalize, channels_first)
+        self.assert_wav(dtype, sample_rate, num_channels, normalize,
                        channels_first)
    @parameterize(
        ["int32"],
        [16000],
        [2],
-        [False],
+        [False], )
    )
    def test_wav_large(self, dtype, sample_rate, num_channels, normalize):
        """`soundfile_backend.load` can load large wav file correctly."""
        two_hours = 2 * 60 * 60
-        self.assert_wav(dtype, sample_rate, num_channels, normalize, duration=two_hours)
+        self.assert_wav(
            dtype, sample_rate, num_channels, normalize, duration=two_hours)
    @parameterize(["float32", "int32"], [4, 8, 16, 32], [False, True])
    def test_multiple_channels(self, dtype, num_channels, channels_first):
        """`soundfile_backend.load` can load wav file with more than 2 channels."""
        sample_rate = 8000
        normalize = False
-        self.assert_wav(dtype, sample_rate, num_channels, normalize, channels_first)
+        self.assert_wav(dtype, sample_rate, num_channels, normalize,
                        channels_first)
    #@parameterize(["int32"], [8000, 16000], [1, 2], [False, True])
    #@skipIfFormatNotSupported("NIST")
@ -291,21 +290,17 @@ class TestLoadFormat(TempDirMixin, unittest.TestCase):
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found, expected)
-    @parameterized.expand(
+    @parameterized.expand([
-        [
+        ("WAV", ),
-            ("WAV",),
+        ("wav", ),
-            ("wav",),
+    ])
        ]
    )
    def test_wav(self, format_):
        self._test_format(format_)
-    @parameterized.expand(
+    @parameterized.expand([
-        [
+        ("FLAC", ),
-            ("FLAC",),
+        ("flac", ),
-            ("flac",),
+    ])
        ]
    )
    @skipIfFormatNotSupported("FLAC")
    def test_flac(self, format_):
        self._test_format(format_)
@ -356,7 +351,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(found.numpy(), expected)
    def test_tarfile_wav(self):
        """Loading audio via file-like object works"""
        self._test_tarfile("wav")
@ -365,5 +359,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        """Loading audio via file-like object works"""
        self._test_tarfile("flac")
 if __name__ == '__main__':
    unittest.main()
--- a/audio/tests/backends/soundfile/save_test.py
+++ b/audio/tests/backends/soundfile/save_test.py
@ -2,23 +2,18 @@ import io
 import unittest
 from unittest.mock import patch
 from paddleaudio._internal import module_utils as _mod_utils
 from paddleaudio.backends import soundfile_backend
 from tests.common_utils import (
    get_wav_data,
    load_wav,
    nested_params,
    normalize_wav,
    save_wav,
    TempDirMixin,
 )
 from common import fetch_wav_subtype, parameterize, skipIfFormatNotSupported
 import paddle
 import numpy as np
-
+import paddle
 import soundfile
 from common import fetch_wav_subtype
 from common import parameterize
 from common import skipIfFormatNotSupported
 from paddleaudio.backends import soundfile_backend
 from tests.common_utils import get_wav_data
 from tests.common_utils import load_wav
 from tests.common_utils import nested_params
 from tests.common_utils import TempDirMixin
 class MockedSaveTest(unittest.TestCase):
@ -41,10 +36,10 @@ class MockedSaveTest(unittest.TestCase):
            ("ULAW", 8),
            ("ALAW", None),
            ("ALAW", 8),
-        ],
+        ], )
    )
    @patch("soundfile.write")
-    def test_wav(self, dtype, sample_rate, num_channels, channels_first, enc_params, mocked_write):
+    def test_wav(self, dtype, sample_rate, num_channels, channels_first,
                 enc_params, mocked_write):
        """soundfile_backend.save passes correct subtype to soundfile.write when WAV"""
        filepath = "foo.wav"
        input_tensor = get_wav_data(
@ -52,8 +47,7 @@ class MockedSaveTest(unittest.TestCase):
            num_channels,
            num_frames=3 * sample_rate,
            normalize=dtype == "float32",
-            channels_first=channels_first,
+            channels_first=channels_first, )
        )
        input_tensor = paddle.transpose(input_tensor, [1, 0])
        encoding, bits_per_sample = enc_params
@ -63,20 +57,20 @@ class MockedSaveTest(unittest.TestCase):
            sample_rate,
            channels_first=channels_first,
            encoding=encoding,
-            bits_per_sample=bits_per_sample,
+            bits_per_sample=bits_per_sample, )
        )
        # on +Py3.8 call_args.kwargs is more descreptive
        args = mocked_write.call_args[1]
        assert args["file"] == filepath
        assert args["samplerate"] == sample_rate
-        assert args["subtype"] == fetch_wav_subtype(dtype, encoding, bits_per_sample)
+        assert args["subtype"] == fetch_wav_subtype(dtype, encoding,
                                                    bits_per_sample)
        assert args["format"] is None
-        tensor_result = paddle.transpose(input_tensor, [1, 0]) if channels_first else input_tensor
+        tensor_result = paddle.transpose(
            input_tensor, [1, 0]) if channels_first else input_tensor
        #self.assertEqual(args["data"], tensor_result.numpy())
-        np.testing.assert_array_almost_equal(args["data"].numpy(), tensor_result.numpy())
+        np.testing.assert_array_almost_equal(args["data"].numpy(),
-
+                                             tensor_result.numpy())
    @patch("soundfile.write")
    def assert_non_wav(
@ -88,8 +82,7 @@ class MockedSaveTest(unittest.TestCase):
            channels_first,
            mocked_write,
            encoding=None,
-        bits_per_sample=None,
+            bits_per_sample=None, ):
    ):
        """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE"""
        filepath = f"foo.{fmt}"
        input_tensor = get_wav_data(
@ -97,11 +90,11 @@ class MockedSaveTest(unittest.TestCase):
            num_channels,
            num_frames=3 * sample_rate,
            normalize=False,
-            channels_first=channels_first,
+            channels_first=channels_first, )
        )
        input_tensor = paddle.transpose(input_tensor, [1, 0])
-        expected_data = paddle.transpose(input_tensor, [1, 0]) if channels_first else input_tensor
+        expected_data = paddle.transpose(
            input_tensor, [1, 0]) if channels_first else input_tensor
        soundfile_backend.save(
            filepath,
@ -109,8 +102,7 @@ class MockedSaveTest(unittest.TestCase):
            sample_rate,
            channels_first,
            encoding=encoding,
-            bits_per_sample=bits_per_sample,
+            bits_per_sample=bits_per_sample, )
        )
        # on +Py3.8 call_args.kwargs is more descreptive
        args = mocked_write.call_args[1]
@ -120,7 +112,8 @@ class MockedSaveTest(unittest.TestCase):
            assert args["format"] == "NIST"
        else:
            assert args["format"] is None
-        np.testing.assert_array_almost_equal(args["data"].numpy(), expected_data.numpy())
+        np.testing.assert_array_almost_equal(args["data"].numpy(),
                                             expected_data.numpy())
        #self.assertEqual(args["data"], expected_data)
    @nested_params(
@ -139,45 +132,57 @@ class MockedSaveTest(unittest.TestCase):
            ("ALAW", 16),
            ("ALAW", 24),
            ("ALAW", 32),
-        ],
+        ], )
-    )
+    def test_sph(self, fmt, dtype, sample_rate, num_channels, channels_first,
-    def test_sph(self, fmt, dtype, sample_rate, num_channels, channels_first, enc_params):
+                 enc_params):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
        encoding, bits_per_sample = enc_params
        self.assert_non_wav(
-            fmt, dtype, sample_rate, num_channels, channels_first, encoding=encoding, bits_per_sample=bits_per_sample
+            fmt,
-        )
+            dtype,
            sample_rate,
            num_channels,
            channels_first,
            encoding=encoding,
            bits_per_sample=bits_per_sample)
    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
        [False, True],
-        [8, 16, 24],
+        [8, 16, 24], )
-    )
+    def test_flac(self, dtype, sample_rate, num_channels, channels_first,
-    def test_flac(self, dtype, sample_rate, num_channels, channels_first, bits_per_sample):
+                  bits_per_sample):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
-        self.assert_non_wav("flac", dtype, sample_rate, num_channels, channels_first, bits_per_sample=bits_per_sample)
+        self.assert_non_wav(
            "flac",
            dtype,
            sample_rate,
            num_channels,
            channels_first,
            bits_per_sample=bits_per_sample)
    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
-        [False, True],
+        [False, True], )
    )
    def test_ogg(self, dtype, sample_rate, num_channels, channels_first):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
-        self.assert_non_wav("ogg", dtype, sample_rate, num_channels, channels_first)
+        self.assert_non_wav("ogg", dtype, sample_rate, num_channels,
                            channels_first)
 class SaveTestBase(TempDirMixin, unittest.TestCase):
    def assert_wav(self, dtype, sample_rate, num_channels, num_frames):
        """`soundfile_backend.save` can save wav format."""
        path = self.get_temp_path("data.wav")
-        expected = get_wav_data(dtype, num_channels, num_frames=num_frames, normalize=False)
+        expected = get_wav_data(
            dtype, num_channels, num_frames=num_frames, normalize=False)
        soundfile_backend.save(path, expected, sample_rate)
        found, sr = load_wav(path, normalize=False)
        assert sample_rate == sr
@ -192,7 +197,8 @@ class SaveTestBase(TempDirMixin, unittest.TestCase):
        """
        num_frames = sample_rate * 3
        path = self.get_temp_path(f"data.{fmt}")
-        expected = get_wav_data(dtype, num_channels, num_frames=num_frames, normalize=False)
+        expected = get_wav_data(
            dtype, num_channels, num_frames=num_frames, normalize=False)
        soundfile_backend.save(path, expected, sample_rate)
        sinfo = soundfile.info(path)
        assert sinfo.format == fmt.upper()
@ -220,16 +226,14 @@ class TestSave(SaveTestBase):
    @parameterize(
        ["float32", "int32"],
        [8000, 16000],
-        [1, 2],
+        [1, 2], )
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save wav format."""
        self.assert_wav(dtype, sample_rate, num_channels, num_frames=None)
    @parameterize(
        ["float32", "int32"],
-        [4, 8, 16, 32],
+        [4, 8, 16, 32], )
    )
    def test_multiple_channels(self, dtype, num_channels):
        """`soundfile_backend.save` can save wav with more than 2 channels."""
        sample_rate = 8000
@ -238,8 +242,7 @@ class TestSave(SaveTestBase):
    @parameterize(
        ["int32"],
        [8000, 16000],
-        [1, 2],
+        [1, 2], )
    )
    @skipIfFormatNotSupported("NIST")
    def test_sphere(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save sph format."""
@ -247,8 +250,7 @@ class TestSave(SaveTestBase):
    @parameterize(
        [8000, 16000],
-        [1, 2],
+        [1, 2], )
    )
    @skipIfFormatNotSupported("FLAC")
    def test_flac(self, sample_rate, num_channels):
        """`soundfile_backend.save` can save flac format."""
@ -256,8 +258,7 @@ class TestSave(SaveTestBase):
    @parameterize(
        [8000, 16000],
-        [1, 2],
+        [1, 2], )
    )
    @skipIfFormatNotSupported("OGG")
    def test_ogg(self, sample_rate, num_channels):
        """`soundfile_backend.save` can save ogg/vorbis format."""
@ -318,5 +319,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        """Saving audio via file-like object works"""
        self._test_fileobj("OGG")
 if __name__ == '__main__':
    unittest.main()
--- a/audio/tests/common_utils/init.py
+++ b/audio/tests/common_utils/init.py
@ -1,17 +1,12 @@
-from .wav_utils import get_wav_data, load_wav, save_wav, normalize_wav
+from .case_utils import name_func
 from .case_utils import TempDirMixin
 from .parameterized_utils import nested_params
-from .case_utils import (
+from .wav_utils import get_wav_data
-    TempDirMixin,
+from .wav_utils import load_wav
-    name_func
+from .wav_utils import normalize_wav
-)
+from .wav_utils import save_wav
 __all__ = [
-    "get_wav_data",
+    "get_wav_data", "load_wav", "save_wav", "normalize_wav", "get_sinusoid",
-    "load_wav",
+    "name_func", "nested_params", "TempDirMixin"
    "save_wav",
    "normalize_wav",
    "get_sinusoid",
    "name_func",
    "nested_params",
    "TempDirMixin"
 ]
--- a/audio/tests/common_utils/wav_utils.py
+++ b/audio/tests/common_utils/wav_utils.py
@ -1,8 +1,8 @@
 from typing import Optional
 import scipy.io.wavfile
 import paddle
-import numpy as np
+import scipy.io.wavfile
 def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor:
    if tensor.dtype == paddle.float32:
@ -26,10 +26,9 @@ def get_wav_data(
        dtype: str,
        num_channels: int,
        *,
-    num_frames: Optional[int] = None,
+        num_frames: Optional[int]=None,
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True, ):
 ):
    """Generate linear signal of the given dtype and num_channels
    Data range is
@ -66,7 +65,8 @@ def get_wav_data(
    elif dtype == "float64":
        base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_)
    elif dtype == "int32":
-        base = paddle.linspace(-2147483648, 2147483647, num_frames, dtype=dtype_)
+        base = paddle.linspace(
            -2147483648, 2147483647, num_frames, dtype=dtype_)
    #elif dtype == "int16":
    #    base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_)
    #dtype_np = getattr(np, dtype)
--- a/audio/docs/source/source/paddleaudio.backends.common.rst
+++ b/audio/docs/source/source/paddleaudio.backends.common.rst
--- a/audio/docs/source/source/paddleaudio.backends.no_backend.rst
+++ b/audio/docs/source/source/paddleaudio.backends.no_backend.rst
--- a/audio/docs/source/source/paddleaudio.backends.rst
+++ b/audio/docs/source/source/paddleaudio.backends.rst
--- a/audio/docs/source/source/paddleaudio.backends.soundfile_backend.rst
+++ b/audio/docs/source/source/paddleaudio.backends.soundfile_backend.rst
--- a/audio/docs/source/source/paddleaudio.backends.sox_io_backend.rst
+++ b/audio/docs/source/source/paddleaudio.backends.sox_io_backend.rst
--- a/audio/docs/source/source/paddleaudio.backends.utils.rst
+++ b/audio/docs/source/source/paddleaudio.backends.utils.rst
--- a/audio/docs/source/source/paddleaudio.compliance.kaldi.rst
+++ b/audio/docs/source/source/paddleaudio.compliance.kaldi.rst
--- a/audio/docs/source/source/paddleaudio.compliance.librosa.rst
+++ b/audio/docs/source/source/paddleaudio.compliance.librosa.rst
--- a/audio/docs/source/source/paddleaudio.compliance.rst
+++ b/audio/docs/source/source/paddleaudio.compliance.rst
--- a/audio/docs/source/source/paddleaudio.datasets.dataset.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.dataset.rst
--- a/audio/docs/source/source/paddleaudio.datasets.esc50.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.esc50.rst
--- a/audio/docs/source/source/paddleaudio.datasets.gtzan.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.gtzan.rst
--- a/audio/docs/source/source/paddleaudio.datasets.hey_snips.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.hey_snips.rst
--- a/audio/docs/source/source/paddleaudio.datasets.rirs_noises.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.rirs_noises.rst
--- a/audio/docs/source/source/paddleaudio.datasets.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.rst
--- a/audio/docs/source/source/paddleaudio.datasets.tess.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.tess.rst
--- a/audio/docs/source/source/paddleaudio.datasets.urban_sound.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.urban_sound.rst
--- a/audio/docs/source/source/paddleaudio.datasets.voxceleb.rst
+++ b/audio/docs/source/source/paddleaudio.datasets.voxceleb.rst
--- a/audio/docs/source/source/paddleaudio.features.layers.rst
+++ b/audio/docs/source/source/paddleaudio.features.layers.rst
--- a/audio/docs/source/source/paddleaudio.features.rst
+++ b/audio/docs/source/source/paddleaudio.features.rst
--- a/audio/docs/source/source/paddleaudio.functional.functional.rst
+++ b/audio/docs/source/source/paddleaudio.functional.functional.rst
--- a/audio/docs/source/source/paddleaudio.functional.rst
+++ b/audio/docs/source/source/paddleaudio.functional.rst
--- a/audio/docs/source/source/paddleaudio.functional.window.rst
+++ b/audio/docs/source/source/paddleaudio.functional.window.rst
--- a/audio/docs/source/source/paddleaudio.io.rst
+++ b/audio/docs/source/source/paddleaudio.io.rst
--- a/audio/docs/source/source/paddleaudio.metric.eer.rst
+++ b/audio/docs/source/source/paddleaudio.metric.eer.rst
--- a/audio/docs/source/source/paddleaudio.metric.rst
+++ b/audio/docs/source/source/paddleaudio.metric.rst
--- a/audio/docs/source/source/paddleaudio.rst
+++ b/audio/docs/source/source/paddleaudio.rst
--- a/audio/docs/source/source/paddleaudio.sox_effects.rst
+++ b/audio/docs/source/source/paddleaudio.sox_effects.rst
--- a/examples/voxceleb/sv0/local/data_prepare.py
+++ b/examples/voxceleb/sv0/local/data_prepare.py
@ -14,9 +14,9 @@
 import argparse
 import paddle
 from paddleaudio.datasets.voxceleb import VoxCeleb
 from yacs.config import CfgNode
 from paddleaudio.datasets.voxceleb import VoxCeleb
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.augment import build_augment_pipeline
 from paddlespeech.vector.training.seeding import seed_everything
--- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
+++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
@ -21,9 +21,9 @@ import os
 from typing import List
 import tqdm
 from paddleaudio.backends import soundfile_load as load_audio
 from yacs.config import CfgNode
 from paddleaudio.backends import soundfile_load as load_audio
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.utils.vector_utils import get_chunks
--- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
+++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
@ -22,9 +22,9 @@ import os
 import random
 import tqdm
 from paddleaudio.backends import soundfile_load as load_audio
 from yacs.config import CfgNode
 from paddleaudio.backends import soundfile_load as load_audio
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.utils.vector_utils import get_chunks
--- a/paddlespeech/audio/init.py
+++ b/paddlespeech/audio/init.py
@ -11,17 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import _extension
 from . import compliance
 from . import datasets
 from . import features
 from . import functional
 from . import io
 from . import metric
 from . import sox_effects
 from . import streamdata
 from . import text
 from . import transform
 from .backends import load
 from .backends import save
--- a/paddlespeech/audio/_extension.py
+++ b/paddlespeech/audio/_extension.py
@ -1,15 +1,12 @@
 import os
 import warnings
 from pathlib import Path
 from ._internal import module_utils as _mod_utils  # noqa: F401
 import contextlib
 import ctypes
 import os
 import sys
 import types
 import warnings
 from pathlib import Path
 from ._internal import module_utils as _mod_utils  # noqa: F401
 # Query `hasattr` only once.
 _SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
@ -68,6 +65,7 @@ class _Ops(types.ModuleType):
 _LIB_DIR = Path(__file__).parent / "lib"
 def _get_lib_path(lib: str):
    suffix = "pyd" if os.name == "nt" else "so"
    path = _LIB_DIR / f"{lib}.{suffix}"
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@ -1,17 +1,17 @@
-from pathlib import Path
+import os
-from typing import Callable
+from typing import Optional
-from typing import Optional, Tuple, Union
+from typing import Tuple
 import paddle
 from paddle import Tensor
 from .common import AudioMetaData
 import os
-from paddlespeech.audio._internal import module_utils  as _mod_utils
+from .common import AudioMetaData
 from paddlespeech.audio import _paddleaudio as paddleaudio
 from paddlespeech.audio._internal import module_utils as _mod_utils
 #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
 def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
@ -23,72 +23,76 @@ def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
 # Note: need to comply TorchScript syntax -- need annotation and no f-string
 def _fail_load(
        filepath: str,
-    frame_offset: int = 0,
+        frame_offset: int=0,
-    num_frames: int = -1,
+        num_frames: int=-1,
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    format: Optional[str] = None,
+        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
 ) -> Tuple[Tensor, int]:
    raise RuntimeError("Failed to load audio from {}".format(filepath))
 def _fail_load_fileobj(fileobj, *args, **kwargs):
    raise RuntimeError(f"Failed to load audio from {fileobj}")
 _fallback_info = _fail_info
 _fallback_info_fileobj = _fail_info_fileobj
 _fallback_load = _fail_load
 _fallback_load_filebj = _fail_load_fileobj
@_mod_utils.requires_sox()
 def load(
        filepath: str,
-        frame_offset: int = 0,
+        frame_offset: int=0,
        num_frames: int=-1,
-        normalize: bool = True,
+        normalize: bool=True,
-        channels_first: bool = True,
+        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
    if hasattr(filepath, "read"):
-        ret = paddleaudio.load_audio_fileobj(
+        ret = paddleaudio.load_audio_fileobj(filepath, frame_offset, num_frames,
-            filepath, frame_offset, num_frames, normalize, channels_first, format
+                                             normalize, channels_first, format)
        )
        if ret is not None:
            audio_tensor = paddle.to_tensor(ret[0])
            return (audio_tensor, ret[1])
-        return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+        return _fallback_load_fileobj(filepath, frame_offset, num_frames,
                                      normalize, channels_first, format)
    filepath = os.fspath(filepath)
-    ret = paddleaudio.sox_io_load_audio_file(
+    ret = paddleaudio.sox_io_load_audio_file(filepath, frame_offset, num_frames,
-        filepath, frame_offset, num_frames, normalize, channels_first, format
+                                             normalize, channels_first, format)
    )
    if ret is not None:
        audio_tensor = paddle.to_tensor(ret[0])
        return (audio_tensor, ret[1])
-    return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)
+    return _fallback_load(filepath, frame_offset, num_frames, normalize,
                          channels_first, format)
@_mod_utils.requires_sox()
-def save(filepath: str,
+def save(
        filepath: str,
        src: Tensor,
        sample_rate: int,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    compression: Optional[float] = None,
+        compression: Optional[float]=None,
-    format: Optional[str] = None,
+        format: Optional[str]=None,
-    encoding: Optional[str] = None,
+        encoding: Optional[str]=None,
-    bits_per_sample: Optional[int] = None,
+        bits_per_sample: Optional[int]=None, ):
 ):
    src_arr = src.numpy()
    if hasattr(filepath, "write"):
-        paddleaudio.save_audio_fileobj(
+        paddleaudio.save_audio_fileobj(filepath, src_arr, sample_rate,
-            filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+                                       channels_first, compression, format,
-        )
+                                       encoding, bits_per_sample)
        return
    filepath = os.fspath(filepath)
-    paddleaudio.sox_io_save_audio_file(
+    paddleaudio.sox_io_save_audio_file(filepath, src_arr, sample_rate,
-        filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample
+                                       channels_first, compression, format,
-    )
+                                       encoding, bits_per_sample)
@_mod_utils.requires_sox()
-def info(filepath: str, format: Optional[str] = None,) -> AudioMetaData:
+def info(
        filepath: str,
        format: Optional[str]=None, ) -> AudioMetaData:
    if hasattr(filepath, "read"):
        sinfo = paddleaudio.get_info_fileobj(filepath, format)
        if sinfo is not None:
--- a/paddlespeech/audio/backends/utils.py
+++ b/paddlespeech/audio/backends/utils.py
@ -1,15 +1,15 @@
 """Defines utilities for switching audio backends"""
 #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py
 import warnings
 from typing import List
 from typing import Optional
 import paddlespeech.audio
 from . import no_backend
 from . import soundfile_backend
 from . import sox_io_backend
 from paddlespeech.audio._internal import module_utils as _mod_utils
 from . import no_backend, soundfile_backend, sox_io_backend
 __all__ = [
    "list_audio_backends",
    "get_audio_backend",
--- a/paddlespeech/audio/sox_effects/init.py
+++ b/paddlespeech/audio/sox_effects/init.py
@ -1,14 +1,10 @@
 from .sox_effects import apply_effects_file
 from .sox_effects import apply_effects_tensor
 from .sox_effects import effect_names
 from .sox_effects import init_sox_effects
 from .sox_effects import shutdown_sox_effects
 from paddlespeech.audio._internal import module_utils as _mod_utils
 from .sox_effects import (
    apply_effects_file,
    apply_effects_tensor,
    effect_names,
    init_sox_effects,
    shutdown_sox_effects,
 )
 if _mod_utils.is_sox_available():
    import atexit
@ -22,4 +18,3 @@ __all__ = [
    "apply_effects_tensor",
    "apply_effects_file",
 ]
--- a/paddlespeech/audio/sox_effects/sox_effects.py
+++ b/paddlespeech/audio/sox_effects/sox_effects.py
@ -1,14 +1,17 @@
 import os
-from typing import List, Optional, Tuple
+from typing import List
 from typing import Optional
 from typing import Tuple
 import paddle
 import numpy
 from paddlespeech.audio import _paddleaudio as paddleaudio
 from paddlespeech.audio._internal import module_utils as _mod_utils
 from paddlespeech.audio.utils.sox_utils import list_effects
 from paddlespeech.audio import _paddleaudio as paddleaudio
 #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py
@_mod_utils.requires_sox()
 def init_sox_effects():
    """Initialize resources required to use sox effects.
@ -57,8 +60,7 @@ def apply_effects_tensor(
        tensor: paddle.Tensor,
        sample_rate: int,
        effects: List[List[str]],
-    channels_first: bool = True,
+        channels_first: bool=True, ) -> Tuple[paddle.Tensor, int]:
 ) -> Tuple[paddle.Tensor, int]:
    """Apply sox effects to given Tensor
    .. devices:: CPU
@ -120,7 +122,8 @@ def apply_effects_tensor(
    """
    tensor_np = tensor.numpy()
-    ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, effects, channels_first)
+    ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate,
                                                       effects, channels_first)
    if ret is not None:
        return (paddle.to_tensor(ret[0]), ret[1])
    raise RuntimeError("Failed to apply sox effect")
@ -130,10 +133,9 @@ def apply_effects_tensor(
 def apply_effects_file(
        path: str,
        effects: List[List[str]],
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    format: Optional[str] = None,
+        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
 ) -> Tuple[paddle.Tensor, int]:
    """Apply sox effects to the audio file and load the resulting data as Tensor
    Note:
@ -227,12 +229,14 @@ def apply_effects_file(
        >>>     pass
    """
    if hasattr(path, "read"):
-        ret = paddleaudio.apply_effects_fileobj(path, effects, normalize, channels_first, format)
+        ret = paddleaudio.apply_effects_fileobj(path, effects, normalize,
                                                channels_first, format)
        if ret is None:
            raise RuntimeError("Failed to load audio from {}".format(path))
        return (paddle.to_tensor(ret[0]), ret[1])
    path = os.fspath(path)
-    ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)
+    ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize,
                                                     channels_first, format)
    if ret is not None:
        return (paddle.to_tensor(ret[0]), ret[1])
    raise RuntimeError("Failed to load audio from {}".format(path))
--- a/paddlespeech/audio/utils/sox_utils.py
+++ b/paddlespeech/audio/utils/sox_utils.py
@ -1,7 +1,11 @@
-from typing import Dict, List
+from typing import Dict
 from typing import List
 from paddlespeech.audio._internal import module_utils as _mod_utils
 from paddlespeech.audio import _paddleaudio
 from paddlespeech.audio._internal import module_utils as _mod_utils
 #Taken form https://github.com/pytorch/audio/blob/main/torchaudio/utils/sox_utils.py with modification.
@_mod_utils.requires_sox()
 def set_seed(seed: int):
--- a/paddlespeech/cli/kws/infer.py
+++ b/paddlespeech/cli/kws/infer.py
@ -20,12 +20,12 @@ from typing import Union
 import paddle
 import yaml
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.kaldi import fbank as kaldi_fbank
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.kaldi import fbank as kaldi_fbank
 __all__ = ['KWSExecutor']
@ -139,7 +139,7 @@ class KWSExecutor(BaseExecutor):
            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """
        assert os.path.isfile(audio_file)
-        waveform, _ = load(audio_file)
+        waveform, _ = load_audio(audio_file)
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocessing audio_file:" + audio_file)
--- a/paddlespeech/cli/vector/infer.py
+++ b/paddlespeech/cli/vector/infer.py
@ -22,13 +22,13 @@ from typing import Union
 import paddle
 import soundfile
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from paddlespeech.vector.io.batch import feature_normalize
 from paddlespeech.vector.modules.sid_model import SpeakerIdetification
--- a/paddlespeech/cls/exps/panns/deploy/predict.py
+++ b/paddlespeech/cls/exps/panns/deploy/predict.py
@ -16,11 +16,10 @@ import os
 import numpy as np
 from paddle import inference
 from scipy.special import softmax
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import melspectrogram
 from scipy.special import softmax
 # yapf: disable
 parser = argparse.ArgumentParser()
--- a/paddlespeech/cls/exps/panns/export_model.py
+++ b/paddlespeech/cls/exps/panns/export_model.py
@ -15,8 +15,8 @@ import argparse
 import os
 import paddle
 from paddleaudio.datasets import ESC50
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
--- a/paddlespeech/cls/exps/panns/predict.py
+++ b/paddlespeech/cls/exps/panns/predict.py
@ -17,10 +17,10 @@ import os
 import paddle
 import paddle.nn.functional as F
 import yaml
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.utils import logger
 from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.utils.dynamic_import import dynamic_import
--- a/paddlespeech/cls/exps/panns/train.py
+++ b/paddlespeech/cls/exps/panns/train.py
@ -14,10 +14,10 @@
 import os
 import paddle
 from yacs.config import CfgNode
 from paddleaudio.utils import logger
 from paddleaudio.utils import Timer
 from yacs.config import CfgNode
 from paddlespeech.kws.exps.mdtc.collate import collate_features
 from paddlespeech.kws.models.loss import max_pooling_loss
 from paddlespeech.kws.models.mdtc import KWSModel
--- a/paddlespeech/cls/models/panns/panns.py
+++ b/paddlespeech/cls/models/panns/panns.py
@ -15,8 +15,8 @@ import os
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddleaudio.utils.download import load_state_dict_from_url
 from paddlespeech.utils.env import MODEL_HOME
 __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']
--- a/paddlespeech/kws/exps/mdtc/train.py
+++ b/paddlespeech/kws/exps/mdtc/train.py
@ -14,10 +14,10 @@
 import os
 import paddle
 from yacs.config import CfgNode
 from paddleaudio.utils import logger
 from paddleaudio.utils import Timer
 from yacs.config import CfgNode
 from paddlespeech.kws.exps.mdtc.collate import collate_features
 from paddlespeech.kws.models.loss import max_pooling_loss
 from paddlespeech.kws.models.mdtc import KWSModel
--- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
+++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
@ -14,11 +14,10 @@
 """Contains the audio featurizer class."""
 import numpy as np
 import paddle
 import paddleaudio.compliance.kaldi as kaldi
 from python_speech_features import delta
 from python_speech_features import mfcc
 import paddleaudio.compliance.kaldi as kaldi
 class AudioFeaturizer():
    """Audio featurizer, for extracting features from audio contents of
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@ -28,10 +28,10 @@ from typing import Tuple
 import paddle
 from paddle import jit
 from paddle import nn
 from paddleaudio.utils.tensor_utils import add_sos_eos
 from paddleaudio.utils.tensor_utils import pad_sequence
 from paddleaudio.utils.tensor_utils import th_accuracy
 from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer
 from paddlespeech.s2t.frontend.utility import IGNORE_ID
 from paddlespeech.s2t.frontend.utility import load_cmvn
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@ -24,9 +24,9 @@ from typing import Tuple
 import paddle
 from paddle import jit
 from paddle import nn
 from paddleaudio.utils.tensor_utils import add_sos_eos
 from paddleaudio.utils.tensor_utils import th_accuracy
 from paddlespeech.s2t.frontend.utility import IGNORE_ID
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.modules.cmvn import GlobalCMVN
--- a/paddlespeech/server/engine/vector/python/vector_engine.py
+++ b/paddlespeech/server/engine/vector/python/vector_engine.py
@ -16,9 +16,9 @@ from collections import OrderedDict
 import numpy as np
 import paddle
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from paddlespeech.cli.log import logger
 from paddlespeech.cli.vector.infer import VectorExecutor
 from paddlespeech.server.engine.base_engine import BaseEngine
--- a/paddlespeech/server/util.py
+++ b/paddlespeech/server/util.py
@ -24,11 +24,11 @@ from typing import Any
 from typing import Dict
 import paddle
 import paddleaudio
 import requests
 import yaml
 from paddle.framework import load
 import paddleaudio
 from .entry import client_commands
 from .entry import server_commands
 from paddlespeech.cli import download
--- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
@ -16,10 +16,10 @@ import os
 import time
 import paddle
 from yacs.config import CfgNode
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.batch import feature_normalize
 from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
--- a/paddlespeech/vector/exps/ecapa_tdnn/test.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py
@ -18,10 +18,10 @@ import numpy as np
 import paddle
 from paddle.io import BatchSampler
 from paddle.io import DataLoader
 from paddleaudio.metric import compute_eer
 from tqdm import tqdm
 from yacs.config import CfgNode
 from paddleaudio.metric import compute_eer
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.batch import batch_feature_normalize
 from paddlespeech.vector.io.dataset import CSVDataset
--- a/paddlespeech/vector/exps/ecapa_tdnn/train.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py
@ -20,9 +20,9 @@ import paddle
 from paddle.io import BatchSampler
 from paddle.io import DataLoader
 from paddle.io import DistributedBatchSampler
 from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 from paddleaudio.compliance.librosa import melspectrogram
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.augment import build_augment_pipeline
 from paddlespeech.vector.io.augment import waveform_augment
--- a/paddlespeech/vector/io/dataset.py
+++ b/paddlespeech/vector/io/dataset.py
@ -15,10 +15,9 @@ from dataclasses import dataclass
 from dataclasses import fields
 from paddle.io import Dataset
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from paddlespeech.s2t.utils.log import Log
 logger = Log(__name__).getlog()
--- a/paddlespeech/vector/io/dataset_from_json.py
+++ b/paddlespeech/vector/io/dataset_from_json.py
@ -16,7 +16,6 @@ from dataclasses import dataclass
 from dataclasses import fields
 from paddle.io import Dataset
 from paddleaudio.backends import soundfile_load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
 from paddleaudio.compliance.librosa import mfcc
--- a/tests/unit/audio/backends/sox_io/load_test.py
+++ b/tests/unit/audio/backends/sox_io/load_test.py
@ -1,28 +1,29 @@
 import unittest
 import itertools
 import unittest
 from parameterized import parameterized
 import numpy as np
-from paddlespeech.audio._internal import module_utils as _mod_utils
+from parameterized import parameterized
 from paddlespeech.audio.backends import sox_io_backend
-from tests.unit.common_utils import (
+from paddlespeech.audio.backends import sox_io_backend
-    get_wav_data,
+from tests.unit.common_utils import get_wav_data
-    load_wav,
+from tests.unit.common_utils import load_wav
-    save_wav,
+from tests.unit.common_utils import save_wav
 )
 #code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/load_test.py
 class TestLoad(unittest.TestCase):
 class TestLoad(unittest.TestCase):
    def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration):
        """`sox_io_backend.load` can load wav format correctly.
        Wav data loaded with sox_io backend should match those with scipy
        """
        path = 'testdata/reference.wav'
-        data = get_wav_data(dtype, num_channels, normalize=normalize, num_frames=duration * sample_rate)
+        data = get_wav_data(
            dtype,
            num_channels,
            normalize=normalize,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        expected = load_wav(path, normalize=normalize)[0]
        data, sr = sox_io_backend.load(path, normalize=normalize)
@ -32,16 +33,18 @@ class TestLoad(unittest.TestCase):
    @parameterized.expand(
        list(
            itertools.product(
-                 ["float64", "float32", "int32",],
+                [
                    "float64",
                    "float32",
                    "int32",
                ],
                [8000, 16000],
                [1, 2],
-                [False, True],
+                [False, True], )), )
            )
        ),
    )
    def test_wav(self, dtype, sample_rate, num_channels, normalize):
        """`sox_io_backend.load` can load wav format correctly."""
        self.assert_wav(dtype, sample_rate, num_channels, normalize, duration=1)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/unit/audio/backends/sox_io/save_test.py
+++ b/tests/unit/audio/backends/sox_io/save_test.py
@ -1,23 +1,19 @@
 import io
 import os
 import unittest
 import numpy as np
 import paddle
 from parameterized import parameterized
 from paddlespeech.audio.backends import sox_io_backend
-from tests.unit.common_utils import (
+from paddlespeech.audio.backends import sox_io_backend
-    get_wav_data,
+from tests.unit.common_utils import get_wav_data
-    load_wav,
+from tests.unit.common_utils import load_wav
-    save_wav,
+from tests.unit.common_utils import nested_params
-    nested_params,
+from tests.unit.common_utils import save_wav
-    TempDirMixin,
+from tests.unit.common_utils import sox_utils
-    sox_utils
+from tests.unit.common_utils import TempDirMixin
 )
 #code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/save_test.py
 def _get_sox_encoding(encoding):
    encodings = {
        "PCM_F": "floating-point",
@ -28,20 +24,20 @@ def _get_sox_encoding(encoding):
    }
    return encodings.get(encoding)
 class TestSaveBase(TempDirMixin):
    def assert_save_consistency(
            self,
            format: str,
            *,
-        compression: float = None,
+            compression: float=None,
-        encoding: str = None,
+            encoding: str=None,
-        bits_per_sample: int = None,
+            bits_per_sample: int=None,
-        sample_rate: float = 8000,
+            sample_rate: float=8000,
-        num_channels: int = 2,
+            num_channels: int=2,
-        num_frames: float = 3 * 8000,
+            num_frames: float=3 * 8000,
-        src_dtype: str = "int32",
+            src_dtype: str="int32",
-        test_mode: str = "path",
+            test_mode: str="path", ):
    ):
        """`save` function produces file that is comparable with `sox` command
        To compare that the file produced by `save` function agains the file produced by
@ -89,15 +85,20 @@ class TestSaveBase(TempDirMixin):
        ref_path = self.get_temp_path("3.2.ref.wav")
        # 1. Generate original wav
-        data = get_wav_data(src_dtype, num_channels, normalize=False, num_frames=num_frames)
+        data = get_wav_data(
            src_dtype, num_channels, normalize=False, num_frames=num_frames)
        save_wav(src_path, data, sample_rate)
        # 2.1. Convert the original wav to target format with paddleaudio
        data = load_wav(src_path, normalize=False)[0]
        if test_mode == "path":
            sox_io_backend.save(
-                tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample
+                tgt_path,
-            )
+                data,
                sample_rate,
                compression=compression,
                encoding=encoding,
                bits_per_sample=bits_per_sample)
        elif test_mode == "fileobj":
            with open(tgt_path, "bw") as file_:
                sox_io_backend.save(
@ -107,8 +108,7 @@ class TestSaveBase(TempDirMixin):
                    format=format,
                    compression=compression,
                    encoding=encoding,
-                    bits_per_sample=bits_per_sample,
+                    bits_per_sample=bits_per_sample, )
                )
        elif test_mode == "bytesio":
            file_ = io.BytesIO()
            sox_io_backend.save(
@ -118,33 +118,40 @@ class TestSaveBase(TempDirMixin):
                format=format,
                compression=compression,
                encoding=encoding,
-                bits_per_sample=bits_per_sample,
+                bits_per_sample=bits_per_sample, )
            )
            file_.seek(0)
            with open(tgt_path, "bw") as f:
                f.write(file_.read())
        else:
            raise ValueError(f"Unexpected test mode: {test_mode}")
        # 2.2. Convert the target format to wav with sox
-        sox_utils.convert_audio_file(tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
+        sox_utils.convert_audio_file(
            tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
        # 2.3. Load with SciPy
        found = load_wav(tst_path, normalize=False)[0]
        # 3.1. Convert the original wav to target format with sox
        sox_encoding = _get_sox_encoding(encoding)
        sox_utils.convert_audio_file(
-            src_path, sox_path, compression=compression, encoding=sox_encoding, bit_depth=bits_per_sample
+            src_path,
-        )
+            sox_path,
            compression=compression,
            encoding=sox_encoding,
            bit_depth=bits_per_sample)
        # 3.2. Convert the target format to wav with sox
-        sox_utils.convert_audio_file(sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
+        sox_utils.convert_audio_file(
            sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
        # 3.3. Load with SciPy
        expected = load_wav(ref_path, normalize=False)[0]
        np.testing.assert_array_almost_equal(found, expected)
 class TestSave(TestSaveBase, unittest.TestCase):
    @nested_params(
-        ["path",],
+        [
            "path",
        ],
        [
            ("PCM_U", 8),
            ("PCM_S", 16),
@ -153,22 +160,27 @@ class TestSave(TestSaveBase, unittest.TestCase):
            ("PCM_F", 64),
            ("ULAW", 8),
            ("ALAW", 8),
-        ],
+        ], )
    )
    def test_save_wav(self, test_mode, enc_params):
        encoding, bits_per_sample = enc_params
-        self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode)
+        self.assert_save_consistency(
            "wav",
            encoding=encoding,
            bits_per_sample=bits_per_sample,
            test_mode=test_mode)
    @nested_params(
        ["path", ],
        [
-            ("float32",),
+            "path",
            ("int32",),
        ],
-    )
+        [
            ("float32", ),
            ("int32", ),
        ], )
    def test_save_wav_dtype(self, test_mode, params):
-        (dtype,) = params
+        (dtype, ) = params
-        self.assert_save_consistency("wav", src_dtype=dtype, test_mode=test_mode)
+        self.assert_save_consistency(
            "wav", src_dtype=dtype, test_mode=test_mode)
 if __name__ == '__main__':
--- a/tests/unit/audio/backends/sox_io/smoke_test.py
+++ b/tests/unit/audio/backends/sox_io/smoke_test.py
@ -3,12 +3,12 @@ import itertools
 import unittest
 from parameterized import parameterized
 from paddlespeech.audio.backends import sox_io_backend
-from tests.unit.common_utils import (
+from tests.unit.common_utils import get_wav_data
-    get_wav_data,
+from tests.unit.common_utils import name_func
-    TempDirMixin,
+from tests.unit.common_utils import TempDirMixin
-    name_func
+
 )
 class SmokeTest(TempDirMixin, unittest.TestCase):
    """Run smoke test on various audio format
@ -20,15 +20,23 @@ class SmokeTest(TempDirMixin, unittest.TestCase):
    however without such tools, the correctness of each function cannot be verified.
    """
-    def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"):
+    def run_smoke_test(self,
                       ext,
                       sample_rate,
                       num_channels,
                       *,
                       compression=None,
                       dtype="float32"):
        duration = 1
        num_frames = sample_rate * duration
        #path = self.get_temp_path(f"test.{ext}")
        path = self.get_temp_path(f"test.{ext}")
-        original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames)
+        original = get_wav_data(
            dtype, num_channels, normalize=False, num_frames=num_frames)
        # 1. run save
-        sox_io_backend.save(path, original, sample_rate, compression=compression)
+        sox_io_backend.save(
            path, original, sample_rate, compression=compression)
        # 2. run info
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
@ -41,14 +49,11 @@ class SmokeTest(TempDirMixin, unittest.TestCase):
    @parameterized.expand(
        list(
            itertools.product(
-                ["float32", "int32" ],
+                ["float32", "int32"],
                #["float32", "int32", "int16", "uint8"],
                [8000, 16000],
-                [1, 2],
+                [1, 2], )),
-            )
+        name_func=name_func, )
        ),
        name_func=name_func,
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """Run smoke test on wav format"""
        self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype)
@ -80,18 +85,15 @@ class SmokeTest(TempDirMixin, unittest.TestCase):
    #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)
    @parameterized.expand(
-        list(
+        list(itertools.product(
            itertools.product(
            [8000, 16000],
            [1, 2],
-                list(range(9)),
+            list(range(9)), )),
-            )
+        name_func=name_func, )
        ),
        name_func=name_func,
    )
    def test_flac(self, sample_rate, num_channels, compression_level):
        """Run smoke test on flac format"""
-        self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level)
+        self.run_smoke_test(
            "flac", sample_rate, num_channels, compression=compression_level)
 class SmokeTestFileObj(unittest.TestCase):
@ -104,14 +106,22 @@ class SmokeTestFileObj(unittest.TestCase):
    however without such tools, the correctness of each function cannot be verified.
    """
-    def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"):
+    def run_smoke_test(self,
                       ext,
                       sample_rate,
                       num_channels,
                       *,
                       compression=None,
                       dtype="float32"):
        duration = 1
        num_frames = sample_rate * duration
-        original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames)
+        original = get_wav_data(
            dtype, num_channels, normalize=False, num_frames=num_frames)
        fileobj = io.BytesIO()
        # 1. run save
-        sox_io_backend.save(fileobj, original, sample_rate, compression=compression, format=ext)
+        sox_io_backend.save(
            fileobj, original, sample_rate, compression=compression, format=ext)
        # 2. run info
        fileobj.seek(0)
        info = sox_io_backend.info(fileobj, format=ext)
@ -124,15 +134,11 @@ class SmokeTestFileObj(unittest.TestCase):
        assert loaded.shape[0] == num_channels
    @parameterized.expand(
-        list(
+        list(itertools.product(
            itertools.product(
            ["float32", "int32"],
            [8000, 16000],
-                [1, 2],
+            [1, 2], )),
-            )
+        name_func=name_func, )
        ),
        name_func=name_func,
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """Run smoke test on wav format"""
        self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype)
@ -165,18 +171,16 @@ class SmokeTestFileObj(unittest.TestCase):
    #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)
    @parameterized.expand(
-        list(
+        list(itertools.product(
            itertools.product(
            [8000, 16000],
            [1, 2],
-                list(range(9)),
+            list(range(9)), )),
-            )
+        name_func=name_func, )
        ),
        name_func=name_func,
    )
    def test_flac(self, sample_rate, num_channels, compression_level):
        #"""Run smoke test on flac format"""
-        self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level)
+        self.run_smoke_test(
            "flac", sample_rate, num_channels, compression=compression_level)
 if __name__ == '__main__':
    #test_func()
--- a/tests/unit/audio/backends/sox_io/sox_effect_test.py
+++ b/tests/unit/audio/backends/sox_io/sox_effect_test.py
@ -4,24 +4,18 @@ import itertools
 import tarfile
 import unittest
 from pathlib import Path
 import numpy as np
 import numpy as np
 from parameterized import parameterized
 from paddlespeech.audio import sox_effects
-from paddlespeech.audio._internal import module_utils as _mod_utils
+from tests.unit.common_utils import get_sinusoid
-from tests.unit.common_utils import (
+from tests.unit.common_utils import get_wav_data
-    get_sinusoid,
+from tests.unit.common_utils import load_effects_params
-    get_wav_data,
+from tests.unit.common_utils import load_wav
-    load_wav,
+from tests.unit.common_utils import save_wav
-    save_wav,
+from tests.unit.common_utils import sox_utils
-    sox_utils,
+from tests.unit.common_utils import TempDirMixin
    TempDirMixin,
    name_func,
    load_effects_params
 )
 if _mod_utils.is_module_available("requests"):
    import requests
 class TestSoxEffects(unittest.TestCase):
@ -35,14 +29,18 @@ class TestSoxEffectsTensor(TempDirMixin, unittest.TestCase):
    """Test suite for `apply_effects_tensor` function"""
    @parameterized.expand(
-        list(itertools.product(["float32", "int32"], [8000, 16000], [1, 2, 4, 8], [True, False])),
+        list(
-    )
+            itertools.product(["float32", "int32"], [8000, 16000], [1, 2, 4, 8],
-    def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first):
+                              [True, False])), )
    def test_apply_no_effect(self, dtype, sample_rate, num_channels,
                             channels_first):
        """`apply_effects_tensor` without effects should return identical data as input"""
-        original = get_wav_data(dtype, num_channels, channels_first=channels_first)
+        original = get_wav_data(
            dtype, num_channels, channels_first=channels_first)
        expected = original.clone()
-        found, output_sample_rate = sox_effects.apply_effects_tensor(expected, sample_rate, [], channels_first)
+        found, output_sample_rate = sox_effects.apply_effects_tensor(
            expected, sample_rate, [], channels_first)
        assert (output_sample_rate == sample_rate)
        # SoxEffect should not alter the input Tensor object
@ -69,12 +67,18 @@ class TestSoxEffectsTensor(TempDirMixin, unittest.TestCase):
        input_path = self.get_temp_path("input.wav")
        reference_path = self.get_temp_path("reference.wav")
-        original = get_sinusoid(frequency=800, sample_rate=input_sr, n_channels=num_channels, dtype="float32")
+        original = get_sinusoid(
            frequency=800,
            sample_rate=input_sr,
            n_channels=num_channels,
            dtype="float32")
        save_wav(input_path, original, input_sr)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)
        expected, expected_sr = load_wav(reference_path)
-        found, sr = sox_effects.apply_effects_tensor(original, input_sr, effects)
+        found, sr = sox_effects.apply_effects_tensor(original, input_sr,
                                                     effects)
        assert sr == expected_sr
        #self.assertEqual(expected, found)
@ -90,20 +94,19 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase):
                ["float32", "int32"],
                [8000, 16000],
                [1, 2, 4, 8],
-                [False, True],
+                [False, True], )),
            )
        ),
        #name_func=name_func,
    )
-    def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first):
+    def test_apply_no_effect(self, dtype, sample_rate, num_channels,
                             channels_first):
        """`apply_effects_file` without effects should return identical data as input"""
        path = self.get_temp_path("input.wav")
-        expected = get_wav_data(dtype, num_channels, channels_first=channels_first)
+        expected = get_wav_data(
            dtype, num_channels, channels_first=channels_first)
        save_wav(path, expected, sample_rate, channels_first=channels_first)
        found, output_sample_rate = sox_effects.apply_effects_file(
-            path, [], normalize=False, channels_first=channels_first
+            path, [], normalize=False, channels_first=channels_first)
        )
        assert output_sample_rate == sample_rate
        #self.assertEqual(expected, found)
@ -126,16 +129,17 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase):
        reference_path = self.get_temp_path("reference.wav")
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)
        expected, expected_sr = load_wav(reference_path)
-        found, sr = sox_effects.apply_effects_file(input_path, effects, normalize=False, channels_first=channels_first)
+        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())
    def test_apply_effects_path(self):
        """`apply_effects_file` should return identical data as sox command when file path is given as a Path Object"""
        dtype = "int32"
@ -149,12 +153,15 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase):
        reference_path = self.get_temp_path("reference.wav")
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)
        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
-            Path(input_path), effects, normalize=False, channels_first=channels_first
+            Path(input_path),
-        )
+            effects,
            normalize=False,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
@ -165,13 +172,10 @@ class TestFileFormats(TempDirMixin, unittest.TestCase):
    """`apply_effects_file` gives the same result as sox on various file formats"""
    @parameterized.expand(
-        list(
+        list(itertools.product(
            itertools.product(
            ["float32", "int32"],
            [8000, 16000],
-                [1, 2],
+            [1, 2], )),
            )
        ),
        #name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
    )
    def test_wav(self, dtype, sample_rate, num_channels):
@ -186,7 +190,8 @@ class TestFileFormats(TempDirMixin, unittest.TestCase):
        sox_utils.run_sox_effect(input_path, reference_path, effects)
        expected, expected_sr = load_wav(reference_path)
-        found, sr = sox_effects.apply_effects_file(input_path, effects, normalize=False, channels_first=channels_first)
+        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
@ -248,14 +253,12 @@ class TestFileFormats(TempDirMixin, unittest.TestCase):
    #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
-#@skipIfNoExec("sox")
+    #@skipIfNoExec("sox")
-#@skipIfNoSox
+    #@skipIfNoSox
 class TestFileObject(TempDirMixin, unittest.TestCase):
-    @parameterized.expand(
+    @parameterized.expand([
        [
        ("wav", None),
-        ]
+    ])
    )
    def test_fileobj(self, ext, compression):
        """Applying effects via file object works"""
        sample_rate = 16000
@ -268,21 +271,25 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        data = get_wav_data("int32", 2, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)
        with open(input_path, "rb") as fileobj:
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
+            found, sr = sox_effects.apply_effects_file(
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
+                fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
-    @parameterized.expand(
+    @parameterized.expand([
        [
        ("wav", None),
-        ]
+    ])
    )
    def test_bytesio(self, ext, compression):
        """Applying effects via BytesIO object works"""
        sample_rate = 16000
@ -294,13 +301,19 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        #sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
        data = get_wav_data("int32", 2, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)
        with open(input_path, "rb") as file_:
            fileobj = io.BytesIO(file_.read())
-        found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
+        found, sr = sox_effects.apply_effects_file(
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
+            fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        print("found")
@ -309,11 +322,9 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        print(expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
-    @parameterized.expand(
+    @parameterized.expand([
        [
        ("wav", None),
-        ]
+    ])
    )
    def test_tarfile(self, ext, compression):
        """Applying effects to compressed audio via file-like file works"""
        sample_rate = 16000
@ -328,7 +339,8 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
        #       sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
+        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)
@ -336,8 +348,13 @@ class TestFileObject(TempDirMixin, unittest.TestCase):
            tarobj.add(input_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
+            found, sr = sox_effects.apply_effects_file(
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
+                fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
--- a/tests/unit/audio/features/base.py
+++ b/tests/unit/audio/features/base.py
@ -17,7 +17,6 @@ import urllib.request
 import numpy as np
 import paddle
 from paddleaudio.backends import soundfile_load as load
 wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
--- a/tests/unit/audio/features/test_istft.py
+++ b/tests/unit/audio/features/test_istft.py
@ -15,9 +15,9 @@ import unittest
 import numpy as np
 import paddle
 from paddleaudio.functional.window import get_window
 from .base import FeatTest
 from paddleaudio.functional.window import get_window
 from paddlespeech.s2t.transform.spectrogram import IStft
 from paddlespeech.s2t.transform.spectrogram import Stft
--- a/tests/unit/audio/features/test_kaldi_feat.py
+++ b/tests/unit/audio/features/test_kaldi_feat.py
@ -14,18 +14,17 @@
 import unittest
 import numpy as np
-import paddle
+from kaldiio import ReadHelper
 from paddlespeech.audio.kaldi import fbank as fbank
 from paddlespeech.audio.kaldi import pitch as pitch
 from kaldiio import ReadHelper
 # the groundtruth feats computed in kaldi command below.
 #compute-fbank-feats  --dither=0 scp:$wav_scp ark,t:fbank_feat.ark
 #compute-kaldi-pitch-feats --sample-frequency=16000 scp:$wav_scp ark,t:pitch_feat.ark
 class TestKaldiFbank(unittest.TestCase):
 class TestKaldiFbank(unittest.TestCase):
    def test_fbank(self):
        fbank_groundtruth = {}
        with ReadHelper('ark:testdata/fbank_feat.ark') as reader:
@ -53,6 +52,5 @@ class TestKaldiFbank(unittest.TestCase):
                    pitch_feat, pitch_check, decimal=4)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/unit/audio/features/test_log_melspectrogram.py
+++ b/tests/unit/audio/features/test_log_melspectrogram.py
@ -15,8 +15,8 @@ import unittest
 import numpy as np
 import paddle
 import paddleaudio
 from .base import FeatTest
 from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram
--- a/tests/unit/audio/features/test_spectrogram.py
+++ b/tests/unit/audio/features/test_spectrogram.py
@ -15,8 +15,8 @@ import unittest
 import numpy as np
 import paddle
 import paddleaudio
 from .base import FeatTest
 from paddlespeech.s2t.transform.spectrogram import Spectrogram
--- a/tests/unit/audio/features/test_stft.py
+++ b/tests/unit/audio/features/test_stft.py
@ -15,9 +15,9 @@ import unittest
 import numpy as np
 import paddle
 from paddleaudio.functional.window import get_window
 from .base import FeatTest
 from paddleaudio.functional.window import get_window
 from paddlespeech.s2t.transform.spectrogram import Stft
--- a/tests/unit/common_utils/init.py
+++ b/tests/unit/common_utils/init.py
@ -1,19 +1,15 @@
-from .wav_utils import get_wav_data, load_wav, save_wav, normalize_wav
+from .case_utils import name_func
 from .case_utils import TempDirMixin
 from .data_utils import get_sinusoid
 from .data_utils import load_effects_params
 from .data_utils import load_params
 from .parameterized_utils import nested_params
-from .data_utils import get_sinusoid, load_params, load_effects_params
+from .wav_utils import get_wav_data
-from .case_utils import (
+from .wav_utils import load_wav
-    TempDirMixin,
+from .wav_utils import normalize_wav
-    name_func
+from .wav_utils import save_wav
 )
 __all__ = [
-    "get_wav_data",
+    "get_wav_data", "load_wav", "save_wav", "normalize_wav", "load_params",
-    "load_wav",
+    "nested_params", "get_sinusoid", "name_func", "load_effects_params"
    "save_wav",
    "normalize_wav",
    "load_params",
    "nested_params",
    "get_sinusoid",
    "name_func",
    "load_effects_params"
 ]
--- a/tests/unit/common_utils/case_utils.py
+++ b/tests/unit/common_utils/case_utils.py
@ -1,24 +1,13 @@
 import functools
 import os.path
 import shutil
 import subprocess
 import sys
 import tempfile
 import time
 import unittest
 #code is from:https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/common_utils/case_utils.py
 import paddle
 from paddlespeech.audio._internal.module_utils import (
    is_kaldi_available,
    is_module_available,
    is_sox_available,
 )
 def name_func(func, _, params):
    return f'{func.__name__}_{"_".join(str(arg) for arg in params.args)}'
 class TempDirMixin:
    """Mixin to provide easy access to temp dir"""
--- a/tests/unit/common_utils/wav_utils.py
+++ b/tests/unit/common_utils/wav_utils.py
@ -1,8 +1,8 @@
 from typing import Optional
 import scipy.io.wavfile
 import paddle
-import numpy as np
+import scipy.io.wavfile
 def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor:
    if tensor.dtype == paddle.float32:
@ -26,10 +26,9 @@ def get_wav_data(
        dtype: str,
        num_channels: int,
        *,
-    num_frames: Optional[int] = None,
+        num_frames: Optional[int]=None,
-    normalize: bool = True,
+        normalize: bool=True,
-    channels_first: bool = True,
+        channels_first: bool=True, ):
 ):
    """Generate linear signal of the given dtype and num_channels
    Data range is
@ -66,7 +65,8 @@ def get_wav_data(
    elif dtype == "float64":
        base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_)
    elif dtype == "int32":
-        base = paddle.linspace(-2147483648, 2147483647, num_frames, dtype=dtype_)
+        base = paddle.linspace(
            -2147483648, 2147483647, num_frames, dtype=dtype_)
    #elif dtype == "int16":
    #    base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_)
    #dtype_np = getattr(np, dtype)