From 06bea5f03d35f7e5fb5dfc1dbb51ff982b875a7e Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Mon, 9 May 2022 19:15:14 +0800 Subject: [PATCH] update the vector and text readme, test=doc --- demos/speech_server/README.md | 166 ++++++++++++++++++ demos/speech_server/README_cn.md | 160 +++++++++++++++++ demos/speech_server/conf/application.yaml | 25 ++- .../ws_conformer_wenetspeech_application.yaml | 46 +++++ paddlespeech/server/README.md | 21 +++ .../engine/asr/online/pretrained_models.py | 18 ++ 6 files changed, 435 insertions(+), 1 deletion(-) create mode 100644 demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 3df93238..2334ec22 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -236,6 +236,166 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` +### 7. Speaker Verification Client Usage + +#### 7.1 Extract speaker embedding +**Note:** The response time will be slightly longer when using the client for the first time +- Command Line (Recommended) + +``` bash +paddlespeech_client vector --task spk --server_ip 127.0.0.1 --port 8590 --input 85236145389.wav +``` + + * Usage: + + ``` bash + paddlespeech_client vector --help + ``` + + * Arguments: + * server_ip: server ip. Default: 127.0.0.1 + * port: server port. Default: 8090 + * input(required): Input text to generate. + * task: the task of vector, can be use 'spk' or 'score。Default is 'spk'。 + * enroll: enroll audio + * test: test audio + + * Output: + + ``` bash + [2022-05-08 00:18:44,249] [ INFO] - vector http client start + [2022-05-08 00:18:44,250] [ INFO] - the input audio: 85236145389.wav + [2022-05-08 00:18:44,250] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector + [2022-05-08 00:18:44,250] [ INFO] - http://127.0.0.1:8590/paddlespeech/vector + [2022-05-08 00:18:44,406] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} + [2022-05-08 00:18:44,406] [ INFO] - Response time 0.156481 s. + ``` + +* Python API + +``` python +from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor + +vectorclient_executor = VectorClientExecutor() +res = vectorclient_executor( + input="85236145389.wav", + server_ip="127.0.0.1", + port=8590, + task="spk") +print(res) +``` + +* Output: + + ``` bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} + ``` + +#### 7.2 Get the score between speaker audio embedding + +**Note:** The response time will be slightly longer when using the client for the first time + +- Command Line (Recommended) + + ``` bash + paddlespeech_client vector --task score --server_ip 127.0.0.1 --port 8590 --enroll 85236145389.wav --test 123456789.wav + ``` + + * Usage: + + ``` bash + paddlespeech_client vector --help + ``` + + * Arguments: + * server_ip: server ip. Default: 127.0.0.1 + * port: server port. Default: 8090 + * input(required): Input text to generate. + * task: the task of vector, can be use 'spk' or 'score。If get the score, this must be 'score' parameter. + * enroll: enroll audio + * test: test audio + +* Output: + +``` bash + [2022-05-09 10:28:40,556] [ INFO] - vector score http client start + [2022-05-09 10:28:40,556] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-09 10:28:40,556] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector/score + [2022-05-09 10:28:40,731] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} + [2022-05-09 10:28:40,731] [ INFO] - The vector: None + [2022-05-09 10:28:40,731] [ INFO] - Response time 0.175514 s. +``` + +* Python API + +``` python +from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor + +vectorclient_executor = VectorClientExecutor() +res = vectorclient_executor( + input=None, + enroll_audio="85236145389.wav", + test_audio="123456789.wav", + server_ip="127.0.0.1", + port=8590, + task="score") +print(res) +``` + +* Output: + +``` bash +[2022-05-09 10:34:54,769] [ INFO] - vector score http client start +[2022-05-09 10:34:54,771] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav +[2022-05-09 10:34:54,771] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector/score +[2022-05-09 10:34:55,026] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} +``` + + +### 8. Punctuation prediction + +**Note:** The response time will be slightly longer when using the client for the first time + +- Command Line (Recommended) + ``` bash + paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input "我认为跑步最重要的就是给我带来了身体健康" + ``` + + Usage: + + ```bash + paddlespeech_client text --help + ``` + 参数: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Input text to get punctuation. + + Output: + ```bash + [2022-05-09 18:19:04,397] [ INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。 + [2022-05-09 18:19:04,397] [ INFO] - Response time 0.092407 s. + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor + + textclient_executor = TextClientExecutor() + res = textclient_executor( + input="我认为跑步最重要的就是给我带来了身体健康", + server_ip="127.0.0.1", + port=8390,) + print(res) + + ``` + + Output: + ```bash + 我认为跑步最重要的就是给我带来了身体健康。 + ``` + + ## Models supported by the service ### ASR model Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference. @@ -245,3 +405,9 @@ Get all models supported by the TTS service via `paddlespeech_server stats --tas ### CLS model Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference. + +### Vector model +Get all models supported by the TTS service via `paddlespeech_server stats --task vector`, where static models can be used for paddle inference inference. + +### Text model +Get all models supported by the CLS service via `paddlespeech_server stats --task text`, where static models can be used for paddle inference inference. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 55fc6b34..5f24239d 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -194,6 +194,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 6. CLS 客户端使用方法 + **注意:** 初次使用客户端时响应时间会略长 - 命令行 (推荐使用) ``` @@ -240,6 +241,159 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` +### 7. 声纹客户端使用方法 + +#### 7.1 提取声纹特征 +注意: 初次使用客户端时响应时间会略长 +* 命令行 (推荐使用) + +``` bash +paddlespeech_client vector --task spk --server_ip 127.0.0.1 --port 8590 --input 85236145389.wav +``` + +* 使用帮助: + +``` bash +paddlespeech_client vector --help +``` +* 参数: + * server_ip: 服务端ip地址,默认: 127.0.0.1。 + * port: 服务端口,默认: 8090。 + * input(必须输入): 用于识别的音频文件。 + * task: vector 的任务,可选spk或者score。默认是 spk。 + * enroll: 注册音频;。 + * test: 测试音频。 +* 输出: + +``` bash + [2022-05-08 00:18:44,249] [ INFO] - vector http client start + [2022-05-08 00:18:44,250] [ INFO] - the input audio: 85236145389.wav + [2022-05-08 00:18:44,250] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector + [2022-05-08 00:18:44,250] [ INFO] - http://127.0.0.1:8590/paddlespeech/vector + [2022-05-08 00:18:44,406] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} + [2022-05-08 00:18:44,406] [ INFO] - Response time 0.156481 s. +``` + +* Python API + +``` python +from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor + +vectorclient_executor = VectorClientExecutor() +res = vectorclient_executor( + input="85236145389.wav", + server_ip="127.0.0.1", + port=8590, + task="spk") +print(res) +``` + +* 输出: + +``` bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} +``` + +#### 7.2 音频声纹打分 + +注意: 初次使用客户端时响应时间会略长 +* 命令行 (推荐使用) + +``` bash +paddlespeech_client vector --task score --server_ip 127.0.0.1 --port 8590 --enroll 85236145389.wav --test 123456789.wav +``` + +* 使用帮助: + +``` bash +paddlespeech_client vector --help +``` + +* 参数: + * server_ip: 服务端ip地址,默认: 127.0.0.1。 + * port: 服务端口,默认: 8090。 + * input(必须输入): 用于识别的音频文件。 + * task: vector 的任务,可选spk或者score。默认是 spk。 + * enroll: 注册音频;。 + * test: 测试音频。 +* 输出: + +``` bash + [2022-05-09 10:28:40,556] [ INFO] - vector score http client start + [2022-05-09 10:28:40,556] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-09 10:28:40,556] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector/score + [2022-05-09 10:28:40,731] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} + [2022-05-09 10:28:40,731] [ INFO] - The vector: None + [2022-05-09 10:28:40,731] [ INFO] - Response time 0.175514 s. +``` + +* Python API + +``` python +from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor + +vectorclient_executor = VectorClientExecutor() +res = vectorclient_executor( + input=None, + enroll_audio="85236145389.wav", + test_audio="123456789.wav", + server_ip="127.0.0.1", + port=8590, + task="score") +print(res) +``` + +* 输出: + +``` bash +[2022-05-09 10:34:54,769] [ INFO] - vector score http client start +[2022-05-09 10:34:54,771] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav +[2022-05-09 10:34:54,771] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector/score +[2022-05-09 10:34:55,026] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} +``` + + +### 8. 标点预测 + + **注意:** 初次使用客户端时响应时间会略长 + - 命令行 (推荐使用) + ``` bash + paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input "我认为跑步最重要的就是给我带来了身体健康" + ``` + + 使用帮助: + + ```bash + paddlespeech_client text --help + ``` + 参数: + - `server_ip`: 服务端ip地址,默认: 127.0.0.1。 + - `port`: 服务端口,默认: 8090。 + - `input`(必须输入): 用于标点预测的文本内容。 + + 输出: + ```bash + [2022-05-09 18:19:04,397] [ INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。 + [2022-05-09 18:19:04,397] [ INFO] - Response time 0.092407 s. + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor + + textclient_executor = TextClientExecutor() + res = textclient_executor( + input="我认为跑步最重要的就是给我带来了身体健康", + server_ip="127.0.0.1", + port=8390,) + print(res) + + ``` + + 输出: + ```bash + 我认为跑步最重要的就是给我带来了身体健康。 + ``` ## 服务支持的模型 ### ASR支持的模型 @@ -250,3 +404,9 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ### CLS支持的模型 通过 `paddlespeech_server stats --task cls` 获取CLS服务支持的所有模型,其中静态模型可用于 paddle inference 推理。 + +### Vector支持的模型 +通过 `paddlespeech_server stats --task vector` 获取Vector服务支持的所有模型。 + +### Text支持的模型 +通过 `paddlespeech_server stats --task text` 获取Text服务支持的所有模型。 \ No newline at end of file diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index 762f4af6..14a9195a 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -9,7 +9,7 @@ port: 8090 # The task format in the engin_list is: _ # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference'] protocol: 'http' -engine_list: ['asr_python', 'tts_python', 'cls_python'] +engine_list: ['asr_python', 'tts_python', 'cls_python', 'text_python', 'vector_python'] ################################################################################# @@ -135,3 +135,26 @@ cls_inference: glog_info: False # True -> print glog summary: True # False -> do not show predictor config + +################################### Text ######################################### +################### text task: punc; engine_type: python ####################### +text_python: + task: punc + model_type: 'ernie_linear_p3_wudao' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + vocab_file: # [optional] + device: # set 'gpu:id' or 'cpu' + + +################################### Vector ###################################### +################### Vector task: spk; engine_type: python ####################### +vector_python: + task: spk + model_type: 'ecapatdnn_voxceleb12' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + device: # set 'gpu:id' or 'cpu' \ No newline at end of file diff --git a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml new file mode 100644 index 00000000..e9a89c19 --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml @@ -0,0 +1,46 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_wenetspeech' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + decode_method: "attention_rescoring" + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 diff --git a/paddlespeech/server/README.md b/paddlespeech/server/README.md index f3dc9224..34b7fc2a 100644 --- a/paddlespeech/server/README.md +++ b/paddlespeech/server/README.md @@ -63,3 +63,24 @@ paddlespeech_server start --config_file conf/tts_online_application.yaml ``` paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --input "您好,欢迎使用百度飞桨深度学习框架!" --output output.wav ``` + + +## Speaker Verification + +### Lanuch speaker verification server + +``` +paddlespeech_server start --config_file conf/vector_application.yaml +``` + +### Extract speaker embedding from aduio + +``` +paddlespeech_client vector --task spk --server_ip 127.0.0.1 --port 8090 --input 85236145389.wav +``` + +### Get score with speaker audio embedding + +``` +paddlespeech_client vector --task score --server_ip 127.0.0.1 --port 8090 --enroll 123456789.wav --test 85236145389.wav +``` diff --git a/paddlespeech/server/engine/asr/online/pretrained_models.py b/paddlespeech/server/engine/asr/online/pretrained_models.py index 005977b4..ff377865 100644 --- a/paddlespeech/server/engine/asr/online/pretrained_models.py +++ b/paddlespeech/server/engine/asr/online/pretrained_models.py @@ -49,4 +49,22 @@ pretrained_models = { 'lm_md5': '29e02312deb2e59b3c8686c7966d4fe3' }, + "conformer_online_wenetspeech-zh-16k": { + 'url': + 'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz', + 'md5': + 'b8c02632b04da34aca88459835be54a6', + 'cfg_path': + 'model.yaml', + 'ckpt_path': + 'exp/chunk_conformer/checkpoints/avg_10', + 'model': + 'exp/chunk_conformer/checkpoints/avg_10.pdparams', + 'params': + 'exp/chunk_conformer/checkpoints/avg_10.pdparams', + 'lm_url': + '', + 'lm_md5': + '', + }, }