From 4051e7b7623cc4b782efbd20e0a6e1d9f0162011 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 23 Mar 2022 11:07:40 +0000
Subject: [PATCH] fix compliance test bug, and format

---
 .gitignore                                    |  4 ++-
 demos/audio_searching/requirements.txt        | 13 +++++----
 demos/audio_searching/src/config.py           |  1 -
 demos/audio_searching/src/logs.py             |  3 +--
 demos/audio_searching/src/operations/load.py  |  5 ++--
 examples/ami/sd0/local/ami_prepare.py         |  1 -
 paddleaudio/.gitignore                        |  2 ++
 paddleaudio/docs/README.md                    |  2 +-
 paddleaudio/docs/source/conf.py               | 27 +++++--------------
 .../paddleaudio/compliance/__init__.py        |  2 ++
 paddleaudio/paddleaudio/metric/mcd.py         |  4 ++-
 .../frontend/zh_normalization/chronology.py   |  4 +--
 paddlespeech/vector/cluster/diarization.py    | 12 ++++-----
 13 files changed, 33 insertions(+), 47 deletions(-)
 create mode 100644 paddleaudio/.gitignore

diff --git a/.gitignore b/.gitignore
index ad8e7492..e25ec327 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,4 +34,6 @@ tools/activate_python.sh
 tools/miniconda.sh
 tools/CRF++-0.58/
 
-speechx/fc_patch/
\ No newline at end of file
+speechx/fc_patch/
+
+third_party/ctc_decoders/paddlespeech_ctcdecoders.py
diff --git a/demos/audio_searching/requirements.txt b/demos/audio_searching/requirements.txt
index 9e73361b..6eb3fd80 100644
--- a/demos/audio_searching/requirements.txt
+++ b/demos/audio_searching/requirements.txt
@@ -1,12 +1,11 @@
-soundfile==0.10.3.post1
+diskcache==5.2.1
+fastapi
 librosa==0.8.0
 numpy
+pydanticpymilvus==2.0.1
 pymysql
-fastapi
-uvicorn
-diskcache==5.2.1
-pymilvus==2.0.1
 python-multipart
-typing
+soundfile==0.10.3.post1
 starlette
-pydantic
\ No newline at end of file
+typing
+uvicorn
diff --git a/demos/audio_searching/src/config.py b/demos/audio_searching/src/config.py
index 72a8fb4b..70ac494c 100644
--- a/demos/audio_searching/src/config.py
+++ b/demos/audio_searching/src/config.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import os
 
 ############### Milvus Configuration ###############
diff --git a/demos/audio_searching/src/logs.py b/demos/audio_searching/src/logs.py
index ba3ed069..465eb682 100644
--- a/demos/audio_searching/src/logs.py
+++ b/demos/audio_searching/src/logs.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import codecs
 import datetime
 import logging
 import os
@@ -124,7 +123,7 @@ class MultiprocessHandler(logging.FileHandler):
             logging.FileHandler.emit(self, record)
         except (KeyboardInterrupt, SystemExit):
             raise
-        except:
+        except Exception as e:
             self.handleError(record)
 
 
diff --git a/demos/audio_searching/src/operations/load.py b/demos/audio_searching/src/operations/load.py
index 7a295bf3..80b6375f 100644
--- a/demos/audio_searching/src/operations/load.py
+++ b/demos/audio_searching/src/operations/load.py
@@ -26,9 +26,8 @@ def get_audios(path):
     """
     supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
     return [
-        item
-        for sublist in [[os.path.join(dir, file) for file in files]
-                        for dir, _, files in list(os.walk(path))]
+        item for sublist in [[os.path.join(dir, file) for file in files]
+                             for dir, _, files in list(os.walk(path))]
         for item in sublist if os.path.splitext(item)[1] in supported_formats
     ]
 
diff --git a/examples/ami/sd0/local/ami_prepare.py b/examples/ami/sd0/local/ami_prepare.py
index 01582dbd..569c3a60 100644
--- a/examples/ami/sd0/local/ami_prepare.py
+++ b/examples/ami/sd0/local/ami_prepare.py
@@ -18,7 +18,6 @@ Download: http://groups.inf.ed.ac.uk/ami/download/
 
 Prepares metadata files (JSON) from manual annotations "segments/" using RTTM format (Oracle VAD).
 """
-
 import argparse
 import glob
 import json
diff --git a/paddleaudio/.gitignore b/paddleaudio/.gitignore
new file mode 100644
index 00000000..1c930053
--- /dev/null
+++ b/paddleaudio/.gitignore
@@ -0,0 +1,2 @@
+.eggs
+*.wav
diff --git a/paddleaudio/docs/README.md b/paddleaudio/docs/README.md
index d53f0be7..8e4fccc5 100644
--- a/paddleaudio/docs/README.md
+++ b/paddleaudio/docs/README.md
@@ -15,4 +15,4 @@ Exclude `paddleaudio.utils`
 
 ## 3. Build
 
-`sphinx-build source _html`
\ No newline at end of file
+`sphinx-build source _html`
diff --git a/paddleaudio/docs/source/conf.py b/paddleaudio/docs/source/conf.py
index 4efe85b0..09c4f312 100644
--- a/paddleaudio/docs/source/conf.py
+++ b/paddleaudio/docs/source/conf.py
@@ -5,18 +5,14 @@
 # This file does only contain a selection of the most common options. For a
 # full list see the documentation:
 # http://www.sphinx-doc.org/en/master/config
-
 # -- Path setup --------------------------------------------------------------
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../..'))
 
-
 # -- Project information -----------------------------------------------------
 
 project = 'PaddleAudio'
@@ -28,7 +24,6 @@ version = ''
 # The full version, including alpha/beta/rc tags
 release = '0.2.0'
 
-
 # -- General configuration ---------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -75,7 +70,6 @@ exclude_patterns = []
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
 
-
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
@@ -112,13 +106,11 @@ html_css_files = [
 #
 # html_sidebars = {}
 
-
 # -- Options for HTMLHelp output ---------------------------------------------
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'PaddleAudiodoc'
 
-
 # -- Options for LaTeX output ------------------------------------------------
 
 latex_elements = {
@@ -143,20 +135,16 @@ latex_elements = {
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation',
-     'PaddlePaddle', 'manual'),
+    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation', 'PaddlePaddle',
+     'manual'),
 ]
 
-
 # -- Options for manual page output ------------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'paddleaudio', 'PaddleAudio Documentation',
-     [author], 1)
-]
-
+man_pages = [(master_doc, 'paddleaudio', 'PaddleAudio Documentation', [author],
+              1)]
 
 # -- Options for Texinfo output ----------------------------------------------
 
@@ -164,12 +152,10 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation',
-     author, 'PaddleAudio', 'One line description of project.',
-     'Miscellaneous'),
+    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation', author,
+     'PaddleAudio', 'One line description of project.', 'Miscellaneous'),
 ]
 
-
 # -- Options for Epub output -------------------------------------------------
 
 # Bibliographic Dublin Core info.
@@ -187,7 +173,6 @@ epub_title = project
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']
 
-
 # -- Extension configuration -------------------------------------------------
 
 # -- Options for intersphinx extension ---------------------------------------
diff --git a/paddleaudio/paddleaudio/compliance/__init__.py b/paddleaudio/paddleaudio/compliance/__init__.py
index 97043fd7..c08f9ab1 100644
--- a/paddleaudio/paddleaudio/compliance/__init__.py
+++ b/paddleaudio/paddleaudio/compliance/__init__.py
@@ -11,3 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import kaldi
+from . import librosa
diff --git a/paddleaudio/paddleaudio/metric/mcd.py b/paddleaudio/paddleaudio/metric/mcd.py
index d1852b4b..63a25fc2 100644
--- a/paddleaudio/paddleaudio/metric/mcd.py
+++ b/paddleaudio/paddleaudio/metric/mcd.py
@@ -22,7 +22,9 @@ __all__ = [
 ]
 
 
-def mcd_distance(xs: np.ndarray, ys: np.ndarray, cost_fn: Callable=mt.logSpecDbDist) -> float:
+def mcd_distance(xs: np.ndarray,
+                 ys: np.ndarray,
+                 cost_fn: Callable=mt.logSpecDbDist) -> float:
     """Mel cepstral distortion (MCD), dtw distance.
 
     Dynamic Time Warping.
diff --git a/paddlespeech/t2s/frontend/zh_normalization/chronology.py b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
index ea518913..ea4558e2 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/chronology.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
@@ -64,7 +64,7 @@ def replace_time(match) -> str:
     result = f"{num2str(hour)}点"
     if minute.lstrip('0'):
         if int(minute) == 30:
-            result += f"半"
+            result += "半"
         else:
             result += f"{_time_num2str(minute)}分"
     if second and second.lstrip('0'):
@@ -75,7 +75,7 @@ def replace_time(match) -> str:
         result += f"{num2str(hour_2)}点"
         if minute_2.lstrip('0'):
             if int(minute) == 30:
-                result += f"半"
+                result += "半"
             else:
                 result += f"{_time_num2str(minute_2)}分"
         if second_2 and second_2.lstrip('0'):
diff --git a/paddlespeech/vector/cluster/diarization.py b/paddlespeech/vector/cluster/diarization.py
index 6432acb8..99ac41cd 100644
--- a/paddlespeech/vector/cluster/diarization.py
+++ b/paddlespeech/vector/cluster/diarization.py
@@ -16,22 +16,20 @@ This script contains basic functions used for speaker diarization.
 This script has an optional dependency on open source sklearn library.
 A few sklearn functions are modified in this script as per requirement.
 """
-
 import argparse
 import warnings
-import scipy
-import numpy as np
 from distutils.util import strtobool
 
+import numpy as np
+import scipy
+import sklearn
 from scipy import sparse
-from scipy.sparse.linalg import eigsh
 from scipy.sparse.csgraph import connected_components
 from scipy.sparse.csgraph import laplacian as csgraph_laplacian
-
-import sklearn
-from sklearn.neighbors import kneighbors_graph
+from scipy.sparse.linalg import eigsh
 from sklearn.cluster import SpectralClustering
 from sklearn.cluster._kmeans import k_means
+from sklearn.neighbors import kneighbors_graph
 
 
 def _graph_connected_component(graph, node_id):