diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md index e8c59ea8b..676611778 100644 --- a/demos/speech_web/README.md +++ b/demos/speech_web/README.md @@ -44,6 +44,8 @@ cd ../ ### 前端环境安装 前端依赖 `node.js` ,需要提前安装,确保 `npm` 可用,`npm` 测试版本 `8.3.1`,建议下载[官网](https://nodejs.org/en/)稳定版的 `node.js` +如果因为网络问题,无法下载依赖库,可以参考 FAQ 部分,`npm / yarn 下载速度慢问题` + ```bash # 进入前端目录 cd web_client @@ -172,8 +174,19 @@ cd web_client yarn dev --port 8011 ``` -默认配置下,前端中配置的后台地址信息是 localhost,确保后端服务器和打开页面的游览器在同一台机器上,不在一台机器的配置方式见下方的 FAQ:【后端如果部署在其它机器或者别的端口如何修改】 +默认配置下,前端配置的后台地址信息是 `localhost`,确保后端服务器和打开页面的游览器在同一台机器上,不在一台机器的配置方式见下方的 FAQ:【后端如果部署在其它机器或者别的端口如何修改】 + +#### 关于前端的一些说明 + +为了方便后期的维护,这里并没有给出打包好的 HTML 文件,而是 Vue3 的项目,使用 `yarn dev --port 8011` 的方式启动测试,方便大家debug,相当于是启动了一个前端服务器。 + +比如我们在本机启动的这个前端服务(运行 `yarn dev --port 8011` ),我们就可以通过在游览器中通过 `http://localhost:8011` 访问前端页面 + +如果我们在其它服务器上(例如:`*.*.*.*` )启动这个前端服务(运行 `yarn dev --port 8011` ),我们就可以通过在游览器中访问 `http://*.*.*.*:8011` 访问前端页面 + +那前端跟后端是什么关系呢? 两个是独立的,只要前端能够通过代理访问到后端的接口,那就没有问题。你可以在 A 机器上部署后端服务,然后在 B 机器上部署前端服务。我们在 `./web_client/vite.config.js` 中将 `/api` 映射到的是 `http://localhost:8010`,你可以把它配置成任意你想要访问后端地址。 +当前端在以 `*.*.*.*` 这类以 IP 地址形式的网页中访问时,由于游览器的安全限制,会禁止录音,需要重新配置游览器的安全策略, 可以看下面 FAQ 部分: [【前端以IP地址的形式访问,无法录音】] ## FAQ @@ -210,12 +223,24 @@ ASR_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/onlineStream', // Stream ASR 接 TTS_SOCKET_RECORD: 'ws://localhost:8010/ws/tts/online', // Stream TTS 接口 ``` -#### Q:后端以IP地址的形式,前端无法录音 +#### Q:前端以IP地址的形式访问,无法录音 A:这里主要是游览器安全策略的限制,需要配置游览器后重启。游览器修改配置可参考[使用js-audio-recorder报浏览器不支持getUserMedia](https://blog.csdn.net/YRY_LIKE_YOU/article/details/113745273) chrome设置地址: chrome://flags/#unsafely-treat-insecure-origin-as-secure +#### Q: npm / yarn 配置淘宝镜像源 + +A: 配置淘宝镜像源,详细可以参考 [【yarn npm 设置淘宝镜像】](https://www.jianshu.com/p/f6f43e8f9d6b) + +```bash +# npm 配置淘宝镜像源 +npm config set registry https://registry.npmmirror.com + +# yarn 配置淘宝镜像源 +yarn config set registry http://registry.npm.taobao.org/ +``` + ## 参考资料 vue实现录音参考资料:https://blog.csdn.net/qq_41619796/article/details/107865602#t1 diff --git a/demos/speech_web/speech_server/src/ernie_sat.py b/demos/speech_web/speech_server/src/ernie_sat.py index b74dd8e3f..02e1ed9d9 100644 --- a/demos/speech_web/speech_server/src/ernie_sat.py +++ b/demos/speech_web/speech_server/src/ernie_sat.py @@ -1,5 +1,6 @@ import os +from .util import get_ngpu from .util import MAIN_ROOT from .util import run_cmd @@ -171,6 +172,7 @@ class SAT: output_name: str, source_lang: str, target_lang: str): + ngpu = get_ngpu() cmd = f""" FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ @@ -189,7 +191,8 @@ class SAT: --voc_config={voc_config} \ --voc_ckpt={voc_ckpt} \ --voc_stat={voc_stat} \ - --output_name={output_name} + --output_name={output_name} \ + --ngpu={ngpu} """ return cmd diff --git a/demos/speech_web/speech_server/src/finetune.py b/demos/speech_web/speech_server/src/finetune.py index d7a440f9a..6ca99251b 100644 --- a/demos/speech_web/speech_server/src/finetune.py +++ b/demos/speech_web/speech_server/src/finetune.py @@ -1,5 +1,6 @@ import os +from .util import get_ngpu from .util import MAIN_ROOT from .util import run_cmd @@ -38,7 +39,7 @@ class FineTune: dump_dir = os.path.join(exp_dir, 'dump') output_dir = os.path.join(exp_dir, 'exp') lang = "zh" - ngpu = 1 + ngpu = get_ngpu() cmd = f""" # check oov @@ -91,7 +92,7 @@ class FineTune: output_dir = os.path.join(exp_dir, 'exp') text_path = os.path.join(exp_dir, 'sentences.txt') lang = "zh" - ngpu = 1 + ngpu = get_ngpu() model_path = f"{output_dir}/checkpoints" ckpt = find_max_ckpt(model_path) @@ -117,7 +118,8 @@ class FineTune: --output_dir={out_wav_dir} \ --phones_dict={dump_dir}/phone_id_map.txt \ --speaker_dict={dump_dir}/speaker_id_map.txt \ - --spk_id=0 + --spk_id=0 \ + --ngpu={ngpu} """ out_path = os.path.join(out_wav_dir, f"{wav_name}.wav") diff --git a/demos/speech_web/speech_server/src/ge2e_clone.py b/demos/speech_web/speech_server/src/ge2e_clone.py index d90013b98..83c2b3f35 100644 --- a/demos/speech_web/speech_server/src/ge2e_clone.py +++ b/demos/speech_web/speech_server/src/ge2e_clone.py @@ -1,6 +1,7 @@ import os import shutil +from .util import get_ngpu from .util import MAIN_ROOT from .util import run_cmd @@ -30,11 +31,12 @@ class VoiceCloneGE2E(): ref_audio_dir = os.path.realpath("tmp_dir/ge2e") if os.path.exists(ref_audio_dir): shutil.rmtree(ref_audio_dir) - else: - os.makedirs(ref_audio_dir, exist_ok=True) - shutil.copy(input_wav, ref_audio_dir) + + os.makedirs(ref_audio_dir, exist_ok=True) + shutil.copy(input_wav, ref_audio_dir) output_dir = os.path.dirname(out_wav) + ngpu = get_ngpu() cmd = f""" python3 {self.BIN_DIR}/voice_cloning.py \ @@ -50,7 +52,8 @@ class VoiceCloneGE2E(): --text="{text}" \ --input-dir={ref_audio_dir} \ --output-dir={output_dir} \ - --phones-dict={self.phones_dict} + --phones-dict={self.phones_dict} \ + --ngpu={ngpu} """ output_name = os.path.join(output_dir, full_file_name) diff --git a/demos/speech_web/speech_server/src/tdnn_clone.py b/demos/speech_web/speech_server/src/tdnn_clone.py index c24b9b077..53c5a3816 100644 --- a/demos/speech_web/speech_server/src/tdnn_clone.py +++ b/demos/speech_web/speech_server/src/tdnn_clone.py @@ -1,6 +1,7 @@ import os import shutil +from .util import get_ngpu from .util import MAIN_ROOT from .util import run_cmd @@ -27,11 +28,11 @@ class VoiceCloneTDNN(): ref_audio_dir = os.path.realpath("tmp_dir/tdnn") if os.path.exists(ref_audio_dir): shutil.rmtree(ref_audio_dir) - else: - os.makedirs(ref_audio_dir, exist_ok=True) - shutil.copy(input_wav, ref_audio_dir) + os.makedirs(ref_audio_dir, exist_ok=True) + shutil.copy(input_wav, ref_audio_dir) output_dir = os.path.dirname(out_wav) + ngpu = get_ngpu() cmd = f""" python3 {self.BIN_DIR}/voice_cloning.py \ @@ -47,7 +48,8 @@ class VoiceCloneTDNN(): --input-dir={ref_audio_dir} \ --output-dir={output_dir} \ --phones-dict={self.phones_dict} \ - --use_ecapa=True + --use_ecapa=True \ + --ngpu={ngpu} """ output_name = os.path.join(output_dir, full_file_name) diff --git a/demos/speech_web/speech_server/src/util.py b/demos/speech_web/speech_server/src/util.py index a69e6c42f..0188f0280 100644 --- a/demos/speech_web/speech_server/src/util.py +++ b/demos/speech_web/speech_server/src/util.py @@ -2,10 +2,19 @@ import os import random import subprocess +import paddle + NOW_FILE_PATH = os.path.dirname(__file__) MAIN_ROOT = os.path.realpath(os.path.join(NOW_FILE_PATH, "../../../../")) +def get_ngpu(): + if paddle.device.get_device() == "cpu": + return 0 + else: + return 1 + + def randName(n=5): return "".join(random.sample('zyxwvutsrqponmlkjihgfedcba', n)) diff --git a/demos/speech_web/speech_server/vc.py b/demos/speech_web/speech_server/vc.py index 99e56b404..d035c02a4 100644 --- a/demos/speech_web/speech_server/vc.py +++ b/demos/speech_web/speech_server/vc.py @@ -281,15 +281,18 @@ async def VcCloneG2P(base: VcBaseText): if base.func == 'ge2e': wavName = base.wavName wavPath = os.path.join(VC_OUT_PATH, wavName) - vc_model.vc( + wavPath = vc_model.vc( text=base.text, input_wav=base.wavPath, out_wav=wavPath) else: wavName = base.wavName wavPath = os.path.join(VC_OUT_PATH, wavName) - vc_model_tdnn.vc( + wavPath = vc_model_tdnn.vc( text=base.text, input_wav=base.wavPath, out_wav=wavPath) - res = {"wavName": wavName, "wavPath": wavPath} - return SuccessRequest(result=res) + if wavPath: + res = {"wavName": wavName, "wavPath": wavPath} + return SuccessRequest(result=res) + else: + return ErrorRequest(message="克隆失败,检查克隆脚本是否有效") except Exception as e: print(e) return ErrorRequest(message="克隆失败,合成过程报错") diff --git a/demos/speech_web/web_client/src/components/Experience.vue b/demos/speech_web/web_client/src/components/Experience.vue index 4f32faf95..f593c0c14 100644 --- a/demos/speech_web/web_client/src/components/Experience.vue +++ b/demos/speech_web/web_client/src/components/Experience.vue @@ -47,7 +47,7 @@ import FineTuneT from './SubMenu/FineTune/FineTune.vue' - + diff --git a/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue b/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue index 761a5c11f..5494bb8f8 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue @@ -58,9 +58,6 @@ export default { mounted () { this.wsUrl = apiURL.ASR_SOCKET_RECORD this.ws = new WebSocket(this.wsUrl) - if(this.ws.readyState === this.ws.CONNECTING){ - this.$message.success("实时识别 Websocket 连接成功") - } var _that = this this.ws.addEventListener('message', function (event) { var temp = JSON.parse(event.data); @@ -78,7 +75,7 @@ export default { // 检查 websocket 状态 // debugger if(this.ws.readyState != this.ws.OPEN){ - this.$message.error("websocket 链接失败,请检查链接地址是否正确") + this.$message.error("websocket 链接失败,请检查 Websocket 后端服务是否正确开启") return } diff --git a/demos/speech_web/web_client/src/components/SubMenu/ChatBot/Chat.vue b/demos/speech_web/web_client/src/components/SubMenu/ChatBot/Chat.vue deleted file mode 100644 index 9d356fc80..000000000 --- a/demos/speech_web/web_client/src/components/SubMenu/ChatBot/Chat.vue +++ /dev/null @@ -1,298 +0,0 @@ - - - 语音聊天 - - {{ recoText }} - - {{ envText }} - - 清空聊天 - - - - - {{Result}} - - - - - - - - \ No newline at end of file diff --git a/demos/speech_web/web_client/src/components/SubMenu/ChatBot/ChatT.vue b/demos/speech_web/web_client/src/components/SubMenu/ChatBot/ChatT.vue index c37c083ff..6db847706 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/ChatBot/ChatT.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/ChatBot/ChatT.vue @@ -91,6 +91,10 @@ export default { methods: { // 开始录音 startRecorder(){ + if(this.ws.readyState != this.ws.OPEN){ + this.$message.error("websocket 链接失败,请检查 Websocket 后端服务是否正确开启") + return + } this.allResultList = [] if(!this.onReco){ this.asrResult = this.speakingText diff --git a/demos/speech_web/web_client/src/components/SubMenu/ENIRE_SAT/ENIRE_SAT.vue b/demos/speech_web/web_client/src/components/SubMenu/ENIRE_SAT/ENIRE_SAT.vue index e1a4f2343..4a0aa2c63 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/ENIRE_SAT/ENIRE_SAT.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/ENIRE_SAT/ENIRE_SAT.vue @@ -98,7 +98,7 @@ 播放 - 播放 + 播放 下载 下载 diff --git a/demos/speech_web/web_client/src/components/SubMenu/FineTune/FineTune.vue b/demos/speech_web/web_client/src/components/SubMenu/FineTune/FineTune.vue index 895dd586d..4f3791fc6 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/FineTune/FineTune.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/FineTune/FineTune.vue @@ -80,7 +80,7 @@ - 播放 + 播放 播放 下载 下载 diff --git a/demos/speech_web/web_client/src/components/SubMenu/IE/IE.vue b/demos/speech_web/web_client/src/components/SubMenu/IE/IE.vue deleted file mode 100644 index c7dd04e9d..000000000 --- a/demos/speech_web/web_client/src/components/SubMenu/IE/IE.vue +++ /dev/null @@ -1,125 +0,0 @@ - - - 信息抽取体验 - {{ recoText }} - 识别结果: {{ asrResultOffline }} - 时间:{{ time }} - 出发地:{{ outset }} - 目的地:{{ destination }} - 费用:{{ amount }} - - - - - - - \ No newline at end of file diff --git a/demos/speech_web/web_client/src/components/SubMenu/TTS/TTST.vue b/demos/speech_web/web_client/src/components/SubMenu/TTS/TTST.vue index 353221f7b..ef5591783 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/TTS/TTST.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/TTS/TTST.vue @@ -228,6 +228,10 @@ export default { }, // 基于WS的流式合成 async getTtsChunkWavWS(){ + if(this.ws.readyState != this.ws.OPEN){ + this.$message.error("websocket 链接失败,请检查 Websocket 后端服务是否正确开启") + return + } // 初始化 chunks chunks = [] chunk_index = 0 diff --git a/demos/speech_web/web_client/src/components/SubMenu/VPR/VPR.vue b/demos/speech_web/web_client/src/components/SubMenu/VPR/VPR.vue deleted file mode 100644 index 1fe71e4d8..000000000 --- a/demos/speech_web/web_client/src/components/SubMenu/VPR/VPR.vue +++ /dev/null @@ -1,178 +0,0 @@ - - - - 声纹识别展示 - - {{ recoText }} - 注册 - 识别 - - - 声纹得分结果 - - - - - - - 声纹数据列表 - - - - - - - - - - - - Delete - - - - - - - - - - - - - \ No newline at end of file diff --git a/demos/speech_web/web_client/src/components/SubMenu/VPR/VPRT.vue b/demos/speech_web/web_client/src/components/SubMenu/VPR/VPRT.vue index e398da00c..47eb41df5 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/VPR/VPRT.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/VPR/VPRT.vue @@ -214,14 +214,17 @@ export default { let formData = new FormData() formData.append('spk_id', this.enrollSpkId) formData.append('audio', this.wav) - + const result = await vprEnroll(formData) + if (!result){ + this.$message.error("请检查后端服务是否正确开启") + return + } if(result.data.status){ this.$message.success("声纹注册成功") } else { this.$message.error(result.data.msg) } - // console.log(result) this.GetList() this.wav = '' this.randomSpkId() diff --git a/demos/speech_web/web_client/src/components/SubMenu/VoiceClone/VoiceClone.vue b/demos/speech_web/web_client/src/components/SubMenu/VoiceClone/VoiceClone.vue index 1e380d288..afa572417 100644 --- a/demos/speech_web/web_client/src/components/SubMenu/VoiceClone/VoiceClone.vue +++ b/demos/speech_web/web_client/src/components/SubMenu/VoiceClone/VoiceClone.vue @@ -71,7 +71,7 @@ - 播放 + 播放 播放 下载 下载 @@ -270,6 +270,7 @@ export default { } else if (this.nowIndex >= this.vcDatas.length){ return this.$message.error("当前序号不可以超过音频个数") } + this.cloneWav = "" let func = '' if(this.func_radio === '1'){ func = 'ge2e' @@ -289,12 +290,12 @@ export default { } ); this.g2pOnSys = 0 - if(!result.data.code){ + if(result.data.code == 0){ this.cloneWav = result.data.result console.log("clone wav: ", this.cloneWav) - this.$message.success("音色克隆成功") + this.$message.success("音频合成成功") } else { - this.$message.error(result.data.msg) + this.$message.error("音频合成失败,请检查后台错误后重试!") } }, // 播放表格