remove unuseful code, test=doc

pull/1710/head
xiongxinlei 3 years ago
parent 89b102a7dd
commit efc269b75f

@ -1,11 +0,0 @@
#!/bin/bash
# wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
# asr
export CUDA_VISIBLE_DEVICES=0
paddlespeech asr --input audio/119994.wav -v
# asr + punc
# paddlespeech asr --input ./zh.wav | paddlespeech text --task punc

@ -5,7 +5,7 @@ process:
n_mels: 80 n_mels: 80
n_shift: 160 n_shift: 160
win_length: 400 win_length: 400
dither: 0.0 dither: 0.1
- type: cmvn_json - type: cmvn_json
cmvn_path: data/mean_std.json cmvn_path: data/mean_std.json
# these three processes are a.k.a. SpecAugument # these three processes are a.k.a. SpecAugument

@ -3,9 +3,9 @@ decode_batch_size: 128
error_rate_type: cer error_rate_type: cer
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode. ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: 1 # decoding chunk size. Defaults to -1. decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
# <0: for decoding, use full chunk. # <0: for decoding, use full chunk.
# >0: for decoding, use fixed chunk size as set. # >0: for decoding, use fixed chunk size as set.
# 0: used for training, it's prohibited here. # 0: used for training, it's prohibited here.
num_decoding_left_chunks: -1 # number of left chunks for decoding. Defaults to -1. num_decoding_left_chunks: -1 # number of left chunks for decoding. Defaults to -1.
simulate_streaming: True # simulate streaming inference. Defaults to False. simulate_streaming: False # simulate streaming inference. Defaults to False.

@ -3,12 +3,12 @@ source path.sh
set -e set -e
gpus=0,1,2,3 gpus=0,1,2,3
stage=5 stage=0
stop_stage=5 stop_stage=50
conf_path=conf/chunk_conformer.yaml conf_path=conf/conformer.yaml
decode_conf_path=conf/tuning/decode.yaml decode_conf_path=conf/tuning/decode.yaml
avg_num=20 avg_num=20
audio_file=audio/zh.wav audio_file=data/demo_01_03.wav
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
@ -44,7 +44,7 @@ fi
# Optionally, you can add LM and test it with runtime. # Optionally, you can add LM and test it with runtime.
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
# test a single .wav file # test a single .wav file
CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/chunk_conformer/checkpoints/multi_cn ${audio_file} || exit -1 CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi fi
# Not supported at now!!! # Not supported at now!!!

@ -14,6 +14,3 @@
import _locale import _locale
_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])

@ -130,10 +130,8 @@ if __name__ == "__main__":
config = CfgNode(new_allowed=True) config = CfgNode(new_allowed=True)
if args.config: if args.config:
print(f"load config: {args.config}")
config.merge_from_file(args.config) config.merge_from_file(args.config)
if args.decode_cfg: if args.decode_cfg:
print(f"load decode cfg: {args.decode_cfg}")
decode_confs = CfgNode(new_allowed=True) decode_confs = CfgNode(new_allowed=True)
decode_confs.merge_from_file(args.decode_cfg) decode_confs.merge_from_file(args.decode_cfg)
config.decode = decode_confs config.decode = decode_confs

@ -4,7 +4,7 @@
# SERVER SETTING # # SERVER SETTING #
################################################################################# #################################################################################
host: 0.0.0.0 host: 0.0.0.0
port: 8096 port: 8090
# The task format in the engin_list is: <speech task>_<engine type> # The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online'] # task choices = ['asr_online', 'tts_online']

@ -105,7 +105,7 @@ class ASRAudioHandler:
def main(args): def main(args):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start") logging.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8096) handler = ASRAudioHandler("127.0.0.1", 8090)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
# support to process single audio file # support to process single audio file

@ -93,7 +93,6 @@ async def websocket_endpoint(websocket: WebSocket):
sample_rate = asr_engine.config.sample_rate sample_rate = asr_engine.config.sample_rate
x_chunk, x_chunk_lens = asr_engine.preprocess(samples, x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
sample_rate) sample_rate)
print(x_chunk_lens)
asr_engine.run(x_chunk, x_chunk_lens) asr_engine.run(x_chunk, x_chunk_lens)
asr_results = asr_engine.postprocess() asr_results = asr_engine.postprocess()
asr_results = asr_engine.postprocess() asr_results = asr_engine.postprocess()

@ -18,11 +18,11 @@ A few sklearn functions are modified in this script as per requirement.
""" """
import argparse import argparse
import warnings import warnings
from distutils.util import strtobool
import numpy as np import numpy as np
import scipy import scipy
import sklearn import sklearn
from distutils.util import strtobool
from scipy import sparse from scipy import sparse
from scipy.sparse.csgraph import connected_components from scipy.sparse.csgraph import connected_components
from scipy.sparse.csgraph import laplacian as csgraph_laplacian from scipy.sparse.csgraph import laplacian as csgraph_laplacian

@ -168,7 +168,7 @@ class DevelopCommand(develop):
def run(self): def run(self):
develop.run(self) develop.run(self)
# must after develop.run, or pkg install by shell will not see # must after develop.run, or pkg install by shell will not see
# self.execute(_post_install, (self.install_lib, ), msg="Post Install...") self.execute(_post_install, (self.install_lib, ), msg="Post Install...")
class InstallCommand(install): class InstallCommand(install):

@ -26,9 +26,9 @@ import argparse
import os import os
import re import re
import subprocess import subprocess
from distutils.util import strtobool
import numpy as np import numpy as np
from distutils.util import strtobool
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+") SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")

Loading…
Cancel
Save