diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh deleted file mode 100755 index a9ae937d2..000000000 --- a/demos/speech_recognition/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -# wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav - -# asr -export CUDA_VISIBLE_DEVICES=0 -paddlespeech asr --input audio/119994.wav -v - - -# asr + punc -# paddlespeech asr --input ./zh.wav | paddlespeech text --task punc \ No newline at end of file diff --git a/examples/aishell/asr1/conf/preprocess.yaml b/examples/aishell/asr1/conf/preprocess.yaml index 6fccd1954..d3992cb9f 100644 --- a/examples/aishell/asr1/conf/preprocess.yaml +++ b/examples/aishell/asr1/conf/preprocess.yaml @@ -5,7 +5,7 @@ process: n_mels: 80 n_shift: 160 win_length: 400 - dither: 0.0 + dither: 0.1 - type: cmvn_json cmvn_path: data/mean_std.json # these three processes are a.k.a. SpecAugument diff --git a/examples/aishell/asr1/conf/tuning/decode.yaml b/examples/aishell/asr1/conf/tuning/decode.yaml index f0a5ba6b5..72ede9272 100644 --- a/examples/aishell/asr1/conf/tuning/decode.yaml +++ b/examples/aishell/asr1/conf/tuning/decode.yaml @@ -3,9 +3,9 @@ decode_batch_size: 128 error_rate_type: cer decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' ctc_weight: 0.5 # ctc weight for attention rescoring decode mode. -decoding_chunk_size: 1 # decoding chunk size. Defaults to -1. +decoding_chunk_size: -1 # decoding chunk size. Defaults to -1. # <0: for decoding, use full chunk. # >0: for decoding, use fixed chunk size as set. # 0: used for training, it's prohibited here. num_decoding_left_chunks: -1 # number of left chunks for decoding. Defaults to -1. -simulate_streaming: True # simulate streaming inference. Defaults to False. +simulate_streaming: False # simulate streaming inference. Defaults to False. diff --git a/examples/aishell/asr1/run.sh b/examples/aishell/asr1/run.sh index be7116a75..c54dae9cf 100644 --- a/examples/aishell/asr1/run.sh +++ b/examples/aishell/asr1/run.sh @@ -3,12 +3,12 @@ source path.sh set -e gpus=0,1,2,3 -stage=5 -stop_stage=5 -conf_path=conf/chunk_conformer.yaml +stage=0 +stop_stage=50 +conf_path=conf/conformer.yaml decode_conf_path=conf/tuning/decode.yaml avg_num=20 -audio_file=audio/zh.wav +audio_file=data/demo_01_03.wav source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; @@ -44,7 +44,7 @@ fi # Optionally, you can add LM and test it with runtime. if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then # test a single .wav file - CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/chunk_conformer/checkpoints/multi_cn ${audio_file} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 fi # Not supported at now!!! diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py index 92c1df7c4..b781c4a8e 100644 --- a/paddlespeech/__init__.py +++ b/paddlespeech/__init__.py @@ -14,6 +14,3 @@ import _locale _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) - - - diff --git a/paddlespeech/s2t/exps/u2/bin/test_wav.py b/paddlespeech/s2t/exps/u2/bin/test_wav.py index 6bc86d8f8..86c3db89f 100644 --- a/paddlespeech/s2t/exps/u2/bin/test_wav.py +++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py @@ -128,12 +128,10 @@ if __name__ == "__main__": args = parser.parse_args() config = CfgNode(new_allowed=True) - + if args.config: - print(f"load config: {args.config}") config.merge_from_file(args.config) if args.decode_cfg: - print(f"load decode cfg: {args.decode_cfg}") decode_confs = CfgNode(new_allowed=True) decode_confs.merge_from_file(args.decode_cfg) config.decode = decode_confs diff --git a/paddlespeech/server/conf/ws_application.yaml b/paddlespeech/server/conf/ws_application.yaml index b2eaf5001..b958bdf69 100644 --- a/paddlespeech/server/conf/ws_application.yaml +++ b/paddlespeech/server/conf/ws_application.yaml @@ -4,7 +4,7 @@ # SERVER SETTING # ################################################################################# host: 0.0.0.0 -port: 8096 +port: 8090 # The task format in the engin_list is: <speech task>_<engine type> # task choices = ['asr_online', 'tts_online'] diff --git a/paddlespeech/server/tests/asr/online/websocket_client.py b/paddlespeech/server/tests/asr/online/websocket_client.py index 58506606e..661eb4dd9 100644 --- a/paddlespeech/server/tests/asr/online/websocket_client.py +++ b/paddlespeech/server/tests/asr/online/websocket_client.py @@ -105,7 +105,7 @@ class ASRAudioHandler: def main(args): logging.basicConfig(level=logging.INFO) logging.info("asr websocket client start") - handler = ASRAudioHandler("127.0.0.1", 8096) + handler = ASRAudioHandler("127.0.0.1", 8090) loop = asyncio.get_event_loop() # support to process single audio file diff --git a/paddlespeech/server/ws/asr_socket.py b/paddlespeech/server/ws/asr_socket.py index 65c04f67f..ad4a1124e 100644 --- a/paddlespeech/server/ws/asr_socket.py +++ b/paddlespeech/server/ws/asr_socket.py @@ -93,7 +93,6 @@ async def websocket_endpoint(websocket: WebSocket): sample_rate = asr_engine.config.sample_rate x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate) - print(x_chunk_lens) asr_engine.run(x_chunk, x_chunk_lens) asr_results = asr_engine.postprocess() asr_results = asr_engine.postprocess() diff --git a/paddlespeech/vector/cluster/diarization.py b/paddlespeech/vector/cluster/diarization.py index 5b2157257..816ab0dee 100644 --- a/paddlespeech/vector/cluster/diarization.py +++ b/paddlespeech/vector/cluster/diarization.py @@ -18,11 +18,11 @@ A few sklearn functions are modified in this script as per requirement. """ import argparse import warnings +from distutils.util import strtobool import numpy as np import scipy import sklearn -from distutils.util import strtobool from scipy import sparse from scipy.sparse.csgraph import connected_components from scipy.sparse.csgraph import laplacian as csgraph_laplacian diff --git a/setup.py b/setup.py index 9a8bb66bb..82ff63412 100644 --- a/setup.py +++ b/setup.py @@ -168,7 +168,7 @@ class DevelopCommand(develop): def run(self): develop.run(self) # must after develop.run, or pkg install by shell will not see - # self.execute(_post_install, (self.install_lib, ), msg="Post Install...") + self.execute(_post_install, (self.install_lib, ), msg="Post Install...") class InstallCommand(install): diff --git a/utils/DER.py b/utils/DER.py index 59bcbec47..d6ab695d8 100755 --- a/utils/DER.py +++ b/utils/DER.py @@ -26,9 +26,9 @@ import argparse import os import re import subprocess +from distutils.util import strtobool import numpy as np -from distutils.util import strtobool FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")