format

4 years ago · 8522b82999
parent 5d5266abff
commit 8522b82999
12 changed files with 62 additions and 30 deletions
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@ -630,4 +630,4 @@ bash server.sh
  [2022-05-02 18:29:26,566] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康。
  ```

-  
+  
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
@ -638,4 +638,4 @@ bash server.sh
  [2022-05-02 18:29:26,566] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康。
  ```

-  
+  
--- a/paddlespeech/cli/vector/infer.py
+++ b/paddlespeech/cli/vector/infer.py
@ -437,7 +437,9 @@ class VectorExecutor(BaseExecutor):
        if self.sample_rate != 16000 and self.sample_rate != 8000:
            logger.error(
                "invalid sample rate, please input --sr 8000 or --sr 16000")
-            logger.error(f"The model sample rate: {self.sample_rate}, the external sample rate is: {sample_rate}")
+            logger.error(
+                f"The model sample rate: {self.sample_rate}, the external sample rate is: {sample_rate}"
+            )
            return False

        if isinstance(audio_file, (str, os.PathLike)):
--- a/paddlespeech/server/README_cn.md
+++ b/paddlespeech/server/README_cn.md
@ -82,4 +82,4 @@ paddlespeech_client vector --task spk  --server_ip 127.0.0.1 --port 8090 --input

 ```
 paddlespeech_client vector --task score  --server_ip 127.0.0.1 --port 8090 --enroll 123456789.wav --test 85236145389.wav
-```
+```
--- a/paddlespeech/server/engine/vector/init.py
+++ b/paddlespeech/server/engine/vector/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/server/engine/vector/python/init.py
+++ b/paddlespeech/server/engine/vector/python/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/server/engine/vector/python/vector_engine.py
+++ b/paddlespeech/server/engine/vector/python/vector_engine.py
@ -16,9 +16,9 @@ from collections import OrderedDict

 import numpy as np
 import paddle
-
 from paddleaudio.backends import load as load_audio
 from paddleaudio.compliance.librosa import melspectrogram
+
 from paddlespeech.cli.log import logger
 from paddlespeech.cli.vector.infer import VectorExecutor
 from paddlespeech.server.engine.base_engine import BaseEngine
--- a/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
+++ b/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
@ -31,7 +31,7 @@ int main(int argc, char* argv[]) {
    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
-    
+
    int sample_rate = FLAGS_sample_rate;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;
--- a/speechx/speechx/decoder/param.h
+++ b/speechx/speechx/decoder/param.h
@ -21,7 +21,8 @@

 // feature
 DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
-// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear feature, or fbank");
+// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
+// feature, or fbank");
 DEFINE_int32(num_bins, 161, "num bins of mel");
 DEFINE_string(cmvn_file, "", "read cmvn");
 DEFINE_double(streaming_chunk, 0.1, "streaming feature chunk size");
@ -67,18 +68,18 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
    frame_opts.frame_shift_ms = 10;
    opts.use_fbank = FLAGS_use_fbank;
    if (opts.use_fbank) {
-      opts.to_float32 = false;
-      frame_opts.window_type = "povey";
-      frame_opts.frame_length_ms = 25;
-      opts.fbank_opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
-      opts.fbank_opts.fbank_opts.frame_opts = frame_opts;
+        opts.to_float32 = false;
+        frame_opts.window_type = "povey";
+        frame_opts.frame_length_ms = 25;
+        opts.fbank_opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
+        opts.fbank_opts.fbank_opts.frame_opts = frame_opts;
    } else {
-      opts.to_float32 = true;
-      frame_opts.remove_dc_offset = false;
-      frame_opts.frame_length_ms = 20;
-      frame_opts.window_type = "hanning";
-      frame_opts.preemph_coeff = 0.0;
-      opts.linear_spectrogram_opts.frame_opts = frame_opts;
+        opts.to_float32 = true;
+        frame_opts.remove_dc_offset = false;
+        frame_opts.frame_length_ms = 20;
+        frame_opts.window_type = "hanning";
+        frame_opts.preemph_coeff = 0.0;
+        opts.linear_spectrogram_opts.frame_opts = frame_opts;
    }
    opts.feature_cache_opts.frame_chunk_size = FLAGS_receptive_field_length;
    opts.feature_cache_opts.frame_chunk_stride = FLAGS_downsampling_rate;
--- a/speechx/speechx/frontend/audio/fbank.cc
+++ b/speechx/speechx/frontend/audio/fbank.cc
@ -102,13 +102,16 @@ bool Fbank::Compute(const Vector<BaseFloat>& waves, Vector<BaseFloat>* feats) {
        // note: this online feature-extraction code does not support VTLN.
        RealFft(&window, true);
        kaldi::ComputePowerSpectrum(&window);
-        const kaldi::MelBanks &mel_bank = *(computer_.GetMelBanks(1.0));
-        SubVector<BaseFloat> power_spectrum(window, 0, window.Dim() / 2 + 1); 
+        const kaldi::MelBanks& mel_bank = *(computer_.GetMelBanks(1.0));
+        SubVector<BaseFloat> power_spectrum(window, 0, window.Dim() / 2 + 1);
        if (!opts_.fbank_opts.use_power) {
            power_spectrum.ApplyPow(0.5);
        }
-        int32 mel_offset = ((opts_.fbank_opts.use_energy && !opts_.fbank_opts.htk_compat) ? 1 : 0);
-        SubVector<BaseFloat> mel_energies(this_feature, mel_offset, opts_.fbank_opts.mel_opts.num_bins);
+        int32 mel_offset =
+            ((opts_.fbank_opts.use_energy && !opts_.fbank_opts.htk_compat) ? 1
+                                                                           : 0);
+        SubVector<BaseFloat> mel_energies(
+            this_feature, mel_offset, opts_.fbank_opts.mel_opts.num_bins);
        mel_bank.Compute(power_spectrum, &mel_energies);
        mel_energies.ApplyFloor(1e-07);
        mel_energies.ApplyLog();
--- a/speechx/speechx/frontend/audio/feature_pipeline.cc
+++ b/speechx/speechx/frontend/audio/feature_pipeline.cc
@ -23,13 +23,13 @@ FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts) {
        new ppspeech::AudioCache(1000 * kint16max, opts.to_float32));

    unique_ptr<FrontendInterface> base_feature;
-    
+
    if (opts.use_fbank) {
-        base_feature.reset(new ppspeech::Fbank(opts.fbank_opts,
-                              std::move(data_source)));
+        base_feature.reset(
+            new ppspeech::Fbank(opts.fbank_opts, std::move(data_source)));
    } else {
-        base_feature.reset(new ppspeech::LinearSpectrogram(opts.linear_spectrogram_opts,
-                              std::move(data_source)));
+        base_feature.reset(new ppspeech::LinearSpectrogram(
+            opts.linear_spectrogram_opts, std::move(data_source)));
    }

    unique_ptr<FrontendInterface> cmvn(
--- a/speechx/speechx/frontend/audio/feature_pipeline.h
+++ b/speechx/speechx/frontend/audio/feature_pipeline.h
@ -18,24 +18,24 @@

 #include "frontend/audio/audio_cache.h"
 #include "frontend/audio/data_cache.h"
+#include "frontend/audio/fbank.h"
 #include "frontend/audio/feature_cache.h"
 #include "frontend/audio/frontend_itf.h"
 #include "frontend/audio/linear_spectrogram.h"
-#include "frontend/audio/fbank.h"
 #include "frontend/audio/normalizer.h"

 namespace ppspeech {

 struct FeaturePipelineOptions {
    std::string cmvn_file;
-    bool to_float32; // true, only for linear feature
+    bool to_float32;  // true, only for linear feature
    bool use_fbank;
    LinearSpectrogramOptions linear_spectrogram_opts;
    FbankOptions fbank_opts;
    FeatureCacheOptions feature_cache_opts;
    FeaturePipelineOptions()
        : cmvn_file(""),
-          to_float32(false), // true, only for linear feature
+          to_float32(false),  // true, only for linear feature
          use_fbank(true),
          linear_spectrogram_opts(),
          fbank_opts(),