From 0730368e5d47660cce66983affb9d8b2a245120b Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 29 Oct 2021 08:52:40 +0000
Subject: [PATCH 1/2] install soundfile lib

---
 tools/Makefile                    |  2 +-
 tools/extras/README.md            |  4 ++++
 tools/extras/install_soundfile.sh | 18 ++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100755 tools/extras/install_soundfile.sh
diff --git a/tools/Makefile b/tools/Makefile
index 87107a53..e2aba8fe 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -24,7 +24,7 @@ clean:
 
 apt.done:
 	apt update -y
-	apt install -y bc flac jq vim tig tree pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev 
+	apt install -y bc flac jq vim tig tree pkg-config libsndfile1 libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev 
 	echo "check_certificate = off" >> ~/.wgetrc
 	touch apt.done
 
diff --git a/tools/extras/README.md b/tools/extras/README.md
index 19c06a13..7d03c4be 100644
--- a/tools/extras/README.md
+++ b/tools/extras/README.md
@@ -1,3 +1,7 @@
+# install scripts
+call from `tools` dir.
+
+## Details
 1. kaldi
 
 deps gcc, mkl or openblas
diff --git a/tools/extras/install_soundfile.sh b/tools/extras/install_soundfile.sh
new file mode 100755
index 00000000..cbc4e00d
--- /dev/null
+++ b/tools/extras/install_soundfile.sh
@@ -0,0 +1,18 @@
+# install package libsndfile
+
+WGET=wget --no-check-certificate
+
+SOUNDFILE=libsndfile-1.0.28
+SOUNDFILE_LIB=${SOUNDFILE}tar.gz
+
+echo "Install package libsndfile into default system path."
+test -e ${SOUNDFILE_LIB} || ${WGET} -c "http://www.mega-nerd.com/libsndfile/files/${SOUNDFILE_LIB}"
+if [ $? != 0 ]; then
+    echo "Download ${SOUNDFILE_LIB} failed !!!"
+    exit 1
+fi
+
+tar -zxvf ${SOUNDFILE_LIB}
+pushd ${SOUNDFILE}
+./configure > /dev/null && make > /dev/null && make install > /dev/null
+popd
\ No newline at end of file

From 9a45a75ae5ea70c2b55a962f5e4812521910014d Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 29 Oct 2021 08:52:49 +0000
Subject: [PATCH 2/2] format code

---
 README.md                            | 18 +++++++++---------
 deepspeech/exps/deepspeech2/model.py |  1 -
 parakeet/data/batch.py               |  4 ++--
 parakeet/exps/tacotron2/ljspeech.py  |  9 +++------
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 7060a655..8a83ac61 100644
--- a/README.md
+++ b/README.md
@@ -9,20 +9,20 @@ English | [简体中文](README_ch.md)
 </p>
 <div align="center">  
 
-  <h3> 
+  <h3>
   <a href="#quick-start"> Quick Start </a>
   | <a href="#tutorials"> Tutorials </a>
-  | <a href="#model-list"> Models List </a> 
+  | <a href="#model-list"> Models List </a>
 </div>
-  
+
 ------------------------------------------------------------------------------------
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
 
 <!---
-why they should use your module, 
-how they can install it, 
+why they should use your module,
+how they can install it,
 how they can use it
 -->
 
@@ -31,7 +31,7 @@ how they can use it
 Via the easy-to-use, efficient, flexible and scalable implementation, our vision is to empower both industrial application and academic research, including training, inference & testing modules, and deployment process. To be more specific, this toolkit features at:
 - **Fast and Light-weight**: we provide high-speed and ultra-lightweight models that are convenient for industrial deployment.
 - **Rule-based Chinese frontend**: our frontend contains Text Normalization (TN) and Grapheme-to-Phoneme (G2P, including Polyphone and Tone Sandhi). Moreover, we use self-defined linguistic rules to adapt Chinese context.
-- **Varieties of Functions that Vitalize both Industrial and Academia**: 
+- **Varieties of Functions that Vitalize both Industrial and Academia**:
   - *Implementation of critical audio tasks*: this toolkit contains audio functions like Speech Translation (ST), Automatic Speech Recognition (ASR), Text-To-Speech Synthesis (TTS), Voice Cloning(VC), Punctuation Restoration, etc.
   - *Integration of mainstream models and datasets*: the toolkit implements modules that participate in the whole pipeline of the speech tasks, and uses mainstream datasets like LibriSpeech, LJSpeech, AIShell, CSMSC, etc. See also [model lists](#models-list) for more details.
   - *Cross-domain application*: as an extension of the application of traditional audio tasks, we combine the aforementioned tasks with other fields like NLP.
@@ -70,7 +70,7 @@ If you want to set up PaddleSpeech in other environment, please see the [ASR ins
 ## Quick Start
 > Note: the current links to `English ASR` and `English TTS` are not valid.
 
-Just a quick test of our functions: [English ASR](link/hubdetail?name=deepspeech2_aishell&en_category=AutomaticSpeechRecognition) and [English TTS](link/hubdetail?name=fastspeech2_baker&en_category=TextToSpeech) by typing message or upload your own audio file. 
+Just a quick test of our functions: [English ASR](link/hubdetail?name=deepspeech2_aishell&en_category=AutomaticSpeechRecognition) and [English TTS](link/hubdetail?name=fastspeech2_baker&en_category=TextToSpeech) by typing message or upload your own audio file.
 
 Developers can have a try of our model with only a few lines of code.
 
@@ -87,7 +87,7 @@ bash local/test.sh conf/deepspeech2.yaml ckptfile offline
 ```
 
 For *TTS*, try FastSpeech2 on LJSpeech:
-- Download LJSpeech-1.1 from the [ljspeech official website](https://keithito.com/LJ-Speech-Dataset/), our prepared durations for fastspeech2 [ljspeech_alignment](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz). 
+- Download LJSpeech-1.1 from the [ljspeech official website](https://keithito.com/LJ-Speech-Dataset/), our prepared durations for fastspeech2 [ljspeech_alignment](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz).
 - The pretrained models are seperated into two parts: [fastspeech2_nosil_ljspeech_ckpt](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_ljspeech_ckpt_0.5.zip) and [pwg_ljspeech_ckpt](https://paddlespeech.bj.bcebos.com/Parakeet/pwg_ljspeech_ckpt_0.5.zip). Please download then unzip to `./model/fastspeech2` and `./model/pwg` respectively.
 - Assume your path to the dataset is `~/datasets/LJSpeech-1.1` and `./ljspeech_alignment` accordingly, preprocess your data and then use our pretrained model to synthesize:
 ```shell
@@ -106,7 +106,7 @@ PaddleSpeech supports a series of most popular models, summarized in [released m
 ASR module contains *Acoustic Model* and *Language Model*, with the following details:
 
 <!---
-The current hyperlinks redirect to [Previous Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples). 
+The current hyperlinks redirect to [Previous Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/develop/examples).
 -->
 
 > Note: The `Link` should be code path rather than download links.
diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py
index 6424cfdf..5c010f56 100644
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -189,7 +189,6 @@ class DeepSpeech2Trainer(Trainer):
         self.lr_scheduler = lr_scheduler
         logger.info("Setup optimizer/lr_scheduler!")
 
-
     def setup_dataloader(self):
         config = self.config.clone()
         config.defrost()
diff --git a/parakeet/data/batch.py b/parakeet/data/batch.py
index 515074d1..5e7ac399 100644
--- a/parakeet/data/batch.py
+++ b/parakeet/data/batch.py
@@ -53,8 +53,8 @@ def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
     peek_example = minibatch[0]
     assert len(peek_example.shape) == 1, "text example is an 1D tensor"
 
-    lengths = [example.shape[0] for example in minibatch
-               ]  # assume (channel, n_samples) or (n_samples, )
+    lengths = [example.shape[0] for example in
+               minibatch]  # assume (channel, n_samples) or (n_samples, )
     max_len = np.max(lengths)
 
     batch = []
diff --git a/parakeet/exps/tacotron2/ljspeech.py b/parakeet/exps/tacotron2/ljspeech.py
index 20dc29d3..59c855eb 100644
--- a/parakeet/exps/tacotron2/ljspeech.py
+++ b/parakeet/exps/tacotron2/ljspeech.py
@@ -67,19 +67,16 @@ class LJSpeechCollector(object):
 
         # Sort by text_len in descending order
         texts = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(texts, text_lens), key=lambda x: x[1], reverse=True)
         ]
         mels = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(mels, text_lens), key=lambda x: x[1], reverse=True)
         ]
 
         mel_lens = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(mel_lens, text_lens), key=lambda x: x[1], reverse=True)
         ]