From 7afbdbefadd6b249e0560d21afa47c7a33a9ab6f Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Tue, 24 May 2022 21:29:36 +0800 Subject: [PATCH 1/5] update the vector model, test=doc --- demos/audio_content_search/requirements.txt | 1 + .../streaming_asr_server.py | 38 +++++ demos/speaker_verification/README.md | 159 +++++++++--------- demos/speaker_verification/README_cn.md | 158 ++++++++--------- docs/source/released_model.md | 2 +- examples/voxceleb/sv0/README.md | 6 +- examples/voxceleb/sv0/RESULT.md | 1 + examples/voxceleb/sv0/conf/ecapa_tdnn.yaml | 8 + .../voxceleb/sv0/conf/ecapa_tdnn_small.yaml | 7 + paddlespeech/cli/vector/pretrained_models.py | 4 +- .../server/engine/acs/python/acs_engine.py | 56 ++++-- .../server/tests/vector/http_client.py | 59 +++++++ 12 files changed, 324 insertions(+), 175 deletions(-) create mode 100644 demos/audio_content_search/requirements.txt create mode 100644 demos/audio_content_search/streaming_asr_server.py create mode 100644 paddlespeech/server/tests/vector/http_client.py diff --git a/demos/audio_content_search/requirements.txt b/demos/audio_content_search/requirements.txt new file mode 100644 index 000000000..4126a4868 --- /dev/null +++ b/demos/audio_content_search/requirements.txt @@ -0,0 +1 @@ +websocket-client \ No newline at end of file diff --git a/demos/audio_content_search/streaming_asr_server.py b/demos/audio_content_search/streaming_asr_server.py new file mode 100644 index 000000000..011b009aa --- /dev/null +++ b/demos/audio_content_search/streaming_asr_server.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse + +from paddlespeech.cli.log import logger +from paddlespeech.server.bin.paddlespeech_server import ServerExecutor +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog='paddlespeech_server.start', add_help=True) + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default=None, + required=True) + + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") + logger.info("start to parse the args") + args = parser.parse_args() + + logger.info("start to launch the streaming asr server") + streaming_asr_server = ServerExecutor() + streaming_asr_server(config_file=args.config_file, log_file=args.log_file) diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md index b6a1d9bcc..a7d0f819d 100644 --- a/demos/speaker_verification/README.md +++ b/demos/speaker_verification/README.md @@ -53,50 +53,49 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav Output: ```bash - demo [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + demo [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] ``` - Python API ```python - import paddle from paddlespeech.cli import VectorExecutor vector_executor = VectorExecutor() @@ -169,47 +168,47 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav -3.7760346 -11.118123 ] # get the test embedding Test embedding Result: - [ -1.902964 2.0690894 -8.034194 3.5472693 0.18089125 - 6.9085927 1.4097427 -1.9487704 -10.021278 -0.20755845 - -8.04332 4.344489 2.3200977 -14.306299 5.184692 - -11.55602 -3.8497238 0.6444722 1.2833948 2.6766639 - 0.5878921 0.7946299 1.7207596 2.5791872 14.998469 - -1.3385371 15.031221 -0.8006958 1.99287 -9.52007 - 2.435466 4.003221 -4.33817 -4.898601 -5.304714 - -18.033886 10.790787 -12.784645 -5.641755 2.9761686 - -10.566622 1.4839455 6.152458 -5.7195854 2.8603241 - 6.112133 8.489869 5.5958056 1.2836679 -1.2293907 - 0.89927405 7.0288725 -2.854029 -0.9782962 5.8255906 - 14.905906 -5.025907 0.7866458 -4.2444224 -16.354029 - 10.521315 0.9604709 -3.3257897 7.144871 -13.592733 - -8.568869 -1.7953678 0.26313916 10.916714 -6.9374123 - 1.857403 -6.2746415 2.8154466 -7.2338667 -2.293357 - -0.05452765 5.4287076 5.0849075 -6.690375 -1.6183422 - 3.654291 0.94352573 -9.200294 -5.4749465 -3.5235846 - 1.3420814 4.240421 -2.772944 -2.8451524 16.311104 - 4.2969875 -1.762936 -12.5758915 8.595198 -0.8835239 - -1.5708797 1.568961 1.1413603 3.5032008 -0.45251232 - -6.786333 16.89443 5.3366146 -8.789056 0.6355629 - 3.2579517 -3.328322 7.5969577 0.66025066 -6.550468 - -9.148656 2.020372 -0.4615173 1.1965656 -3.8764873 - 11.6562195 -6.0750933 12.182899 3.2218833 0.81969476 - 5.570001 -3.8459578 -7.205299 7.9262037 -7.6611166 - -5.249467 -2.2671914 7.2658715 -13.298164 4.821147 - -2.7263982 11.691089 -3.8918593 -2.838112 -1.0336838 - -3.8034165 2.8536487 -5.60398 -1.1972581 1.3455094 - -3.4903061 2.2408795 5.5010734 -3.970756 11.99696 - -7.8858757 0.43160373 -5.5059714 4.3426995 16.322706 - 11.635366 0.72157705 -9.245714 -3.91465 -4.449838 - -1.5716927 7.713747 -2.2430465 -6.198303 -13.481864 - 2.8156567 -5.7812386 5.1456156 2.7289324 -14.505571 - 13.270688 3.448231 -7.0659585 4.5886116 -4.466099 - -0.296428 -11.463529 -2.6076477 14.110243 -6.9725137 - -1.9962958 2.7119343 19.391657 0.01961198 14.607133 - -1.6695905 -4.391516 1.3131028 -6.670972 -5.888604 - 12.0612335 5.9285784 3.3715196 1.492534 10.723728 - -0.95514804 -12.085431 ] + [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 + 3.5059214 1.6159848 0.49364898 -11.6899185 -3.1014526 + -5.6589785 -0.42684984 2.674276 -11.937654 6.2248464 + -10.776924 -5.694543 1.112041 1.5709964 1.0961034 + 1.3976512 2.324352 1.339981 5.279319 13.734659 + -2.5753925 13.651442 -2.2357535 5.1575427 -3.251567 + 1.4023279 6.1191974 -6.0845175 -1.3646189 -2.6789894 + -15.220778 9.779349 -9.411551 -6.388947 6.8313975 + -9.245996 0.31196198 2.5509644 -4.413065 6.1649427 + 6.793837 2.6328635 8.620976 3.4832475 0.52491665 + 2.9115407 5.8392377 0.6702376 -3.2726715 2.6694255 + 16.91701 -5.5811176 0.23362345 -4.5573606 -11.801059 + 14.728292 -0.5198082 -3.999922 7.0927105 -7.0459595 + -5.4389 -0.46420583 -5.1085467 10.376568 -8.889225 + -0.37705845 -1.659806 2.6731026 -7.1909504 1.4608804 + -2.163136 -0.17949677 4.0241547 0.11319201 0.601279 + 2.039692 3.1910992 -11.649526 -8.121584 -4.8707457 + 0.3851982 1.4231744 -2.3321972 0.99332285 14.121717 + 5.899413 0.7384519 -17.760096 10.555021 4.1366534 + -0.3391071 -0.20792882 3.208204 0.8847948 -8.721497 + -6.432868 13.006379 4.8956 -9.155822 -1.9441519 + 5.7815638 -2.066733 10.425042 -0.8802383 -2.4314315 + -9.869258 0.35095334 -5.3549943 2.1076174 -8.290468 + 8.4433365 -4.689333 9.334139 -2.172678 -3.0250976 + 8.394216 -3.2110903 -7.93868 2.3960824 -2.3213403 + -1.4963245 -3.476059 4.132903 -10.893354 4.362673 + -0.45456508 10.258634 -1.1655927 -6.7799754 0.22885278 + -4.399287 2.333433 -4.84745 -4.2752337 -1.3577863 + -1.0685898 9.505196 7.3062205 0.08708266 12.927811 + -9.57974 1.3936648 -1.9444873 5.776769 15.251903 + 10.6118355 -1.4903594 -9.535318 -3.6553776 -1.6699586 + -0.5933151 7.600357 -4.8815503 -8.698617 -15.855757 + 0.25632986 -7.2235737 0.9506656 0.7128582 -9.051738 + 8.74869 -1.6426028 -6.5762258 2.506905 -6.7431564 + 5.129912 -12.189555 -3.6435068 12.068113 -6.0059533 + -2.3535995 2.9014351 22.3082 -1.5563312 13.193291 + 2.7583609 -7.468798 1.3407065 -4.599617 -6.2345777 + 10.7689295 7.137627 5.099476 0.3473359 9.647881 + -2.0484571 -5.8549366 ] # get the score between enroll and test - Eembeddings Score: 0.4292638301849365 + Eembeddings Score: 0.45332613587379456 ``` ### 4.Pretrained Models diff --git a/demos/speaker_verification/README_cn.md b/demos/speaker_verification/README_cn.md index 90bba38ac..04e1aeecd 100644 --- a/demos/speaker_verification/README_cn.md +++ b/demos/speaker_verification/README_cn.md @@ -51,45 +51,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav 输出: ```bash - demo [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] ``` - Python API @@ -166,47 +166,47 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav -3.7760346 -11.118123 ] # get the test embedding Test embedding Result: - [ -1.902964 2.0690894 -8.034194 3.5472693 0.18089125 - 6.9085927 1.4097427 -1.9487704 -10.021278 -0.20755845 - -8.04332 4.344489 2.3200977 -14.306299 5.184692 - -11.55602 -3.8497238 0.6444722 1.2833948 2.6766639 - 0.5878921 0.7946299 1.7207596 2.5791872 14.998469 - -1.3385371 15.031221 -0.8006958 1.99287 -9.52007 - 2.435466 4.003221 -4.33817 -4.898601 -5.304714 - -18.033886 10.790787 -12.784645 -5.641755 2.9761686 - -10.566622 1.4839455 6.152458 -5.7195854 2.8603241 - 6.112133 8.489869 5.5958056 1.2836679 -1.2293907 - 0.89927405 7.0288725 -2.854029 -0.9782962 5.8255906 - 14.905906 -5.025907 0.7866458 -4.2444224 -16.354029 - 10.521315 0.9604709 -3.3257897 7.144871 -13.592733 - -8.568869 -1.7953678 0.26313916 10.916714 -6.9374123 - 1.857403 -6.2746415 2.8154466 -7.2338667 -2.293357 - -0.05452765 5.4287076 5.0849075 -6.690375 -1.6183422 - 3.654291 0.94352573 -9.200294 -5.4749465 -3.5235846 - 1.3420814 4.240421 -2.772944 -2.8451524 16.311104 - 4.2969875 -1.762936 -12.5758915 8.595198 -0.8835239 - -1.5708797 1.568961 1.1413603 3.5032008 -0.45251232 - -6.786333 16.89443 5.3366146 -8.789056 0.6355629 - 3.2579517 -3.328322 7.5969577 0.66025066 -6.550468 - -9.148656 2.020372 -0.4615173 1.1965656 -3.8764873 - 11.6562195 -6.0750933 12.182899 3.2218833 0.81969476 - 5.570001 -3.8459578 -7.205299 7.9262037 -7.6611166 - -5.249467 -2.2671914 7.2658715 -13.298164 4.821147 - -2.7263982 11.691089 -3.8918593 -2.838112 -1.0336838 - -3.8034165 2.8536487 -5.60398 -1.1972581 1.3455094 - -3.4903061 2.2408795 5.5010734 -3.970756 11.99696 - -7.8858757 0.43160373 -5.5059714 4.3426995 16.322706 - 11.635366 0.72157705 -9.245714 -3.91465 -4.449838 - -1.5716927 7.713747 -2.2430465 -6.198303 -13.481864 - 2.8156567 -5.7812386 5.1456156 2.7289324 -14.505571 - 13.270688 3.448231 -7.0659585 4.5886116 -4.466099 - -0.296428 -11.463529 -2.6076477 14.110243 -6.9725137 - -1.9962958 2.7119343 19.391657 0.01961198 14.607133 - -1.6695905 -4.391516 1.3131028 -6.670972 -5.888604 - 12.0612335 5.9285784 3.3715196 1.492534 10.723728 - -0.95514804 -12.085431 ] + [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 + 3.5059214 1.6159848 0.49364898 -11.6899185 -3.1014526 + -5.6589785 -0.42684984 2.674276 -11.937654 6.2248464 + -10.776924 -5.694543 1.112041 1.5709964 1.0961034 + 1.3976512 2.324352 1.339981 5.279319 13.734659 + -2.5753925 13.651442 -2.2357535 5.1575427 -3.251567 + 1.4023279 6.1191974 -6.0845175 -1.3646189 -2.6789894 + -15.220778 9.779349 -9.411551 -6.388947 6.8313975 + -9.245996 0.31196198 2.5509644 -4.413065 6.1649427 + 6.793837 2.6328635 8.620976 3.4832475 0.52491665 + 2.9115407 5.8392377 0.6702376 -3.2726715 2.6694255 + 16.91701 -5.5811176 0.23362345 -4.5573606 -11.801059 + 14.728292 -0.5198082 -3.999922 7.0927105 -7.0459595 + -5.4389 -0.46420583 -5.1085467 10.376568 -8.889225 + -0.37705845 -1.659806 2.6731026 -7.1909504 1.4608804 + -2.163136 -0.17949677 4.0241547 0.11319201 0.601279 + 2.039692 3.1910992 -11.649526 -8.121584 -4.8707457 + 0.3851982 1.4231744 -2.3321972 0.99332285 14.121717 + 5.899413 0.7384519 -17.760096 10.555021 4.1366534 + -0.3391071 -0.20792882 3.208204 0.8847948 -8.721497 + -6.432868 13.006379 4.8956 -9.155822 -1.9441519 + 5.7815638 -2.066733 10.425042 -0.8802383 -2.4314315 + -9.869258 0.35095334 -5.3549943 2.1076174 -8.290468 + 8.4433365 -4.689333 9.334139 -2.172678 -3.0250976 + 8.394216 -3.2110903 -7.93868 2.3960824 -2.3213403 + -1.4963245 -3.476059 4.132903 -10.893354 4.362673 + -0.45456508 10.258634 -1.1655927 -6.7799754 0.22885278 + -4.399287 2.333433 -4.84745 -4.2752337 -1.3577863 + -1.0685898 9.505196 7.3062205 0.08708266 12.927811 + -9.57974 1.3936648 -1.9444873 5.776769 15.251903 + 10.6118355 -1.4903594 -9.535318 -3.6553776 -1.6699586 + -0.5933151 7.600357 -4.8815503 -8.698617 -15.855757 + 0.25632986 -7.2235737 0.9506656 0.7128582 -9.051738 + 8.74869 -1.6426028 -6.5762258 2.506905 -6.7431564 + 5.129912 -12.189555 -3.6435068 12.068113 -6.0059533 + -2.3535995 2.9014351 22.3082 -1.5563312 13.193291 + 2.7583609 -7.468798 1.3407065 -4.599617 -6.2345777 + 10.7689295 7.137627 5.099476 0.3473359 9.647881 + -2.0484571 -5.8549366 ] # get the score between enroll and test - Eembeddings Score: 0.4292638301849365 + Eembeddings Score: 0.45332613587379456 ``` ### 4.预训练模型 diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 74435ae1a..3231fecd4 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -82,7 +82,7 @@ PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https Model Type | Dataset| Example Link | Pretrained Models | Static Models :-------------:| :------------:| :-----: | :-----: | :-----: -PANN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz) | - +PANN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | - ## Punctuation Restoration Models Model Type | Dataset| Example Link | Pretrained Models diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md index 418102b4f..26c95aca9 100644 --- a/examples/voxceleb/sv0/README.md +++ b/examples/voxceleb/sv0/README.md @@ -141,11 +141,11 @@ using the `tar` scripts to unpack the model and then you can use the script to t For example: ``` -wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz -tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz +wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz +tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz source path.sh # If you have processed the data and get the manifest file, you can skip the following 2 steps -CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_2/model/ conf/ecapa_tdnn.yaml +CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1/model/ conf/ecapa_tdnn.yaml ``` The performance of the released models are shown in [this](./RESULTS.md) diff --git a/examples/voxceleb/sv0/RESULT.md b/examples/voxceleb/sv0/RESULT.md index 3a3f67d09..a1d2a1812 100644 --- a/examples/voxceleb/sv0/RESULT.md +++ b/examples/voxceleb/sv0/RESULT.md @@ -5,3 +5,4 @@ | Model | Number of Params | Release | Config | dim | Test set | Cosine | Cosine + S-Norm | | --- | --- | --- | --- | --- | --- | --- | ---- | | ECAPA-TDNN | 85M | 0.2.0 | conf/ecapa_tdnn.yaml |192 | test | 1.02 | 0.95 | +| ECAPA-TDNN | 85M | 0.2.1 | conf/ecapa_tdnn.yaml | 192 | test | 0.8188 | 0.7815| diff --git a/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml b/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml index 3e3a13072..b7b71d77d 100644 --- a/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml +++ b/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml @@ -59,3 +59,11 @@ global_embedding_norm: True embedding_mean_norm: True embedding_std_norm: False +########################################### +# score-norm # +########################################### +score_norm: s-norm +cohort_size: 20000 # amount of imposter utterances in normalization cohort +n_train_snts: 400000 # used for normalization stats + + diff --git a/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml b/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml index 5925e5730..40498c874 100644 --- a/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml +++ b/examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml @@ -58,3 +58,10 @@ global_embedding_norm: True embedding_mean_norm: True embedding_std_norm: False +########################################### +# score-norm # +########################################### +score_norm: s-norm +cohort_size: 20000 # amount of imposter utterances in normalization cohort +n_train_snts: 400000 # used for normalization stats + diff --git a/paddlespeech/cli/vector/pretrained_models.py b/paddlespeech/cli/vector/pretrained_models.py index 686a22d8f..4d1d3a048 100644 --- a/paddlespeech/cli/vector/pretrained_models.py +++ b/paddlespeech/cli/vector/pretrained_models.py @@ -19,9 +19,9 @@ pretrained_models = { # "paddlespeech vector --task spk --model ecapatdnn_voxceleb12-16k --sr 16000 --input ./input.wav" "ecapatdnn_voxceleb12-16k": { 'url': - 'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz', + 'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz', 'md5': - 'cc33023c54ab346cd318408f43fcaf95', + '67c7ff8885d5246bd16e0f5ac1cba99f', 'cfg_path': 'conf/model.yaml', # the yaml config path 'ckpt_path': diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py index 30deeeb50..d52852dcf 100644 --- a/paddlespeech/server/engine/acs/python/acs_engine.py +++ b/paddlespeech/server/engine/acs/python/acs_engine.py @@ -16,6 +16,7 @@ import json import os import re +import numpy as np import paddle import soundfile import websocket @@ -44,11 +45,7 @@ class ACSEngine(BaseEngine): logger.info("Init the acs engine") try: self.config = config - if self.config.device: - self.device = self.config.device - else: - self.device = paddle.get_device() - + self.device = self.config.get("device", paddle.get_device()) paddle.set_device(self.device) logger.info(f"ACS Engine set the device: {self.device}") @@ -116,11 +113,17 @@ class ACSEngine(BaseEngine): logger.info("client receive msg={}".format(msg)) # send the total audio data - samples, sample_rate = soundfile.read(audio_data, dtype='int16') - ws.send_binary(samples.tobytes()) - msg = ws.recv() - msg = json.loads(msg) - logger.info(f"audio result: {msg}") + for chunk_data in self.read_wave(audio_data): + ws.send_binary(chunk_data.tobytes()) + msg = ws.recv() + msg = json.loads(msg) + logger.info(f"audio result: {msg}") + # samples, sample_rate = soundfile.read(audio_data, dtype='int16') + + # ws.send_binary(samples.tobytes()) + # msg = ws.recv() + # msg = json.loads(msg) + # logger.info(f"audio result: {msg}") # 3. send chunk audio data to engine logger.info("send the end signal") @@ -142,6 +145,39 @@ class ACSEngine(BaseEngine): return msg + def read_wave(self, audio_data: str): + """read the audio file from specific wavfile path + + Args: + audio_data (str): the audio data, + we assume that audio sample rate matches the model + + Yields: + numpy.array: the samall package audio pcm data + """ + samples, sample_rate = soundfile.read(audio_data, dtype='int16') + x_len = len(samples) + assert sample_rate == 16000 + + chunk_size = int(85 * sample_rate / 1000) # 85ms, sample_rate = 16kHz + + if x_len % chunk_size != 0: + padding_len_x = chunk_size - x_len % chunk_size + else: + padding_len_x = 0 + + padding = np.zeros((padding_len_x), dtype=samples.dtype) + padded_x = np.concatenate([samples, padding], axis=0) + + assert (x_len + padding_len_x) % chunk_size == 0 + num_chunk = (x_len + padding_len_x) / chunk_size + num_chunk = int(num_chunk) + for i in range(0, num_chunk): + start = i * chunk_size + end = start + chunk_size + x_chunk = padded_x[start:end] + yield x_chunk + def get_macthed_word(self, msg): """Get the matched info in msg diff --git a/paddlespeech/server/tests/vector/http_client.py b/paddlespeech/server/tests/vector/http_client.py new file mode 100644 index 000000000..49f2adf7c --- /dev/null +++ b/paddlespeech/server/tests/vector/http_client.py @@ -0,0 +1,59 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the +import base64 +import json +import time + +import requests + + +def readwav2base64(wav_file): + """ + read wave file and covert to base64 string + """ + with open(wav_file, 'rb') as f: + base64_bytes = base64.b64encode(f.read()) + base64_string = base64_bytes.decode('utf-8') + return base64_string + + +def main(): + """ + main func + """ + url = "http://127.0.0.1:8090/paddlespeech/asr" + + # start Timestamp + time_start = time.time() + + test_audio_dir = "./16_audio.wav" + audio = readwav2base64(test_audio_dir) + + data = { + "audio": audio, + "audio_format": "wav", + "sample_rate": 16000, + "lang": "zh_cn", + } + + r = requests.post(url=url, data=json.dumps(data)) + + # ending Timestamp + time_end = time.time() + print('time cost', time_end - time_start, 's') + + print(r.json()) + + +if __name__ == "__main__": + main() From a83374a78755079c0990b02d0a1d60e198a5d167 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 25 May 2022 12:20:44 +0800 Subject: [PATCH 2/5] update the vector readme, test=doc --- demos/speaker_verification/README.md | 78 +++++++++---------- demos/speaker_verification/README_cn.md | 78 +++++++++---------- demos/speech_server/README.md | 35 +++++---- demos/speech_server/README_cn.md | 37 ++++----- .../server/tests/vector/http_client.py | 59 -------------- 5 files changed, 115 insertions(+), 172 deletions(-) delete mode 100644 paddlespeech/server/tests/vector/http_client.py diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md index a7d0f819d..63dc9294e 100644 --- a/demos/speaker_verification/README.md +++ b/demos/speaker_verification/README.md @@ -127,45 +127,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav ```bash # Vector Result: Audio embedding Result: - [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] # get the test embedding Test embedding Result: [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 diff --git a/demos/speaker_verification/README_cn.md b/demos/speaker_verification/README_cn.md index 04e1aeecd..07eeac2ee 100644 --- a/demos/speaker_verification/README_cn.md +++ b/demos/speaker_verification/README_cn.md @@ -125,45 +125,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav ```bash # Vector Result: Audio embedding Result: - [ 1.4217498 5.626253 -5.342073 1.1773866 3.308055 - 1.756596 5.167894 10.80636 -3.8226728 -5.6141334 - 2.623845 -0.8072968 1.9635103 -7.3128724 0.01103897 - -9.723131 0.6619743 -6.976803 10.213478 7.494748 - 2.9105635 3.8949256 3.7999806 7.1061673 16.905321 - -7.1493764 8.733103 3.4230042 -4.831653 -11.403367 - 11.232214 7.1274667 -4.2828417 2.452362 -5.130748 - -18.177666 -2.6116815 -11.000337 -6.7314315 1.6564683 - 0.7618269 1.1253023 -2.083836 4.725744 -8.782597 - -3.539873 3.814236 5.1420674 2.162061 4.096431 - -6.4162116 12.747448 1.9429878 -15.152943 6.417416 - 16.097002 -9.716668 -1.9920526 -3.3649497 -1.871939 - 11.567354 3.69788 11.258265 7.442363 9.183411 - 4.5281515 -1.2417862 4.3959084 6.6727695 5.8898783 - 7.627124 -0.66919386 -11.889693 -9.208865 -7.4274073 - -3.7776625 6.917234 -9.848748 -2.0944717 -5.135116 - 0.49563864 9.317534 -5.9141874 -1.8098574 -0.11738578 - -7.169265 -1.0578263 -5.7216787 -5.1173844 16.137651 - -4.473626 7.6624317 -0.55381083 9.631587 -6.4704556 - -8.548508 4.3716145 -0.79702514 4.478997 -2.9758704 - 3.272176 2.8382776 5.134597 -9.190781 -0.5657382 - -4.8745747 2.3165567 -5.984303 -2.1798875 0.35541576 - -0.31784213 9.493548 2.1144536 4.358092 -12.089823 - 8.451689 -7.925461 4.6242585 4.4289427 18.692003 - -2.6204622 -5.149185 -0.35821092 8.488551 4.981496 - -9.32683 -2.2544234 6.6417594 1.2119585 10.977129 - 16.555033 3.3238444 9.551863 -1.6676947 -0.79539716 - -8.605674 -0.47356385 2.6741948 -5.359179 -2.6673796 - 0.66607 15.443222 4.740594 -3.4725387 11.592567 - -2.054497 1.7361217 -8.265324 -9.30447 5.4068313 - -1.5180256 -7.746615 -6.089606 0.07112726 -0.34904733 - -8.649895 -9.998958 -2.564841 -0.53999114 2.601808 - -0.31927416 -1.8815292 -2.07215 -3.4105783 -8.2998085 - 1.483641 -15.365992 -8.288208 3.8847756 -3.4876456 - 7.3629923 0.4657332 3.132599 12.438889 -1.8337058 - 4.532936 2.7264361 10.145339 -6.521951 2.897153 - -3.3925855 5.079156 7.759716 4.677565 5.8457737 - 2.402413 7.7071047 3.9711342 -6.390043 6.1268735 - -3.7760346 -11.118123 ] + [ -1.3251206 7.8606825 -4.620626 0.3000721 2.2648535 + -1.1931441 3.0647137 7.673595 -6.0044727 -12.02426 + -1.9496069 3.1269536 1.618838 -7.6383104 -1.2299773 + -12.338331 2.1373026 -5.3957124 9.717328 5.6752305 + 3.7805123 3.0597172 3.429692 8.97601 13.174125 + -0.53132284 8.9424715 4.46511 -4.4262476 -9.726503 + 8.399328 7.2239175 -7.435854 2.9441683 -4.3430395 + -13.886965 -1.6346735 -10.9027405 -5.311245 3.8007221 + 3.8976038 -2.1230774 -2.3521194 4.151031 -7.4048667 + 0.13911647 2.4626107 4.9664545 0.9897574 5.4839754 + -3.3574002 10.1340065 -0.6120171 -10.403095 4.6007543 + 16.00935 -7.7836914 -4.1945305 -6.9368606 1.1789556 + 11.490801 4.2380238 9.550931 8.375046 7.5089145 + -0.65707296 -0.30051577 2.8406055 3.0828028 0.730817 + 6.148354 0.13766119 -13.424735 -7.7461405 -2.3227983 + -8.305252 2.9879124 -10.995229 0.15211068 -2.3820348 + -1.7984174 8.495629 -5.8522367 -3.755498 0.6989711 + -5.2702994 -2.6188622 -1.8828466 -4.64665 14.078544 + -0.5495333 10.579158 -3.2160501 9.349004 -4.381078 + -11.675817 -2.8630207 4.5721755 2.246612 -4.574342 + 1.8610188 2.3767874 5.6257877 -9.784078 0.64967257 + -1.4579505 0.4263264 -4.9211264 -2.454784 3.4869802 + -0.42654222 8.341269 1.356552 7.0966883 -13.102829 + 8.016734 -7.1159344 1.8699781 0.208721 14.699384 + -1.025278 -2.6107233 -2.5082312 8.427193 6.9138527 + -6.2912464 0.6157366 2.489688 -3.4668267 9.921763 + 11.200815 -0.1966403 7.4916005 -0.62312716 -0.25848144 + -9.947997 -0.9611041 1.1649219 -2.1907122 -1.5028487 + -0.51926106 15.165954 2.4649463 -0.9980445 7.4416637 + -2.0768049 3.5896823 -7.3055434 -7.5620847 4.323335 + 0.0804418 -6.56401 -2.3148053 -1.7642345 -2.4708817 + -7.675618 -9.548878 -1.0177554 0.16986446 2.5877135 + -1.8752296 -0.36614323 -6.0493784 -2.3965611 -5.9453387 + 0.9424033 -13.155974 -7.457801 0.14658108 -3.742797 + 5.8414927 -1.2872906 5.5694313 12.57059 1.0939219 + 2.2142086 1.9181576 6.9914207 -5.888139 3.1409824 + -2.003628 2.4434285 9.973139 5.03668 2.0051203 + 2.8615603 5.860224 2.9176188 -1.6311141 2.0292206 + -4.070415 -6.831437 ] # get the test embedding Test embedding Result: [ 2.5247195 5.119042 -4.335273 4.4583654 5.047907 diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index a03a43dff..14a88f078 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -274,12 +274,12 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ```bash - [2022-05-08 00:18:44,249] [ INFO] - vector http client start - [2022-05-08 00:18:44,250] [ INFO] - the input audio: 85236145389.wav - [2022-05-08 00:18:44,250] [ INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector - [2022-05-08 00:18:44,250] [ INFO] - http://127.0.0.1:8590/paddlespeech/vector - [2022-05-08 00:18:44,406] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} - [2022-05-08 00:18:44,406] [ INFO] - Response time 0.156481 s. + [2022-05-25 12:25:36,165] [ INFO] - vector http client start + [2022-05-25 12:25:36,165] [ INFO] - the input audio: 85236145389.wav + [2022-05-25 12:25:36,165] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector + [2022-05-25 12:25:36,166] [ INFO] - http://127.0.0.1:8790/paddlespeech/vector + [2022-05-25 12:25:36,324] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [-1.3251205682754517, 7.860682487487793, -4.620625972747803, 0.3000721037387848, 2.2648534774780273, -1.1931440830230713, 3.064713716506958, 7.673594951629639, -6.004472732543945, -12.024259567260742, -1.9496068954467773, 3.126953601837158, 1.6188379526138306, -7.638310432434082, -1.2299772500991821, -12.33833122253418, 2.1373026371002197, -5.395712375640869, 9.717328071594238, 5.675230503082275, 3.7805123329162598, 3.0597171783447266, 3.429692029953003, 8.9760103225708, 13.174124717712402, -0.5313228368759155, 8.942471504211426, 4.465109825134277, -4.426247596740723, -9.726503372192383, 8.399328231811523, 7.223917484283447, -7.435853958129883, 2.9441683292388916, -4.343039512634277, -13.886964797973633, -1.6346734762191772, -10.902740478515625, -5.311244964599609, 3.800722122192383, 3.897603750228882, -2.123077392578125, -2.3521194458007812, 4.151031017303467, -7.404866695404053, 0.13911646604537964, 2.4626107215881348, 4.96645450592041, 0.9897574186325073, 5.483975410461426, -3.3574001789093018, 10.13400650024414, -0.6120170950889587, -10.403095245361328, 4.600754261016846, 16.009349822998047, -7.78369140625, -4.194530487060547, -6.93686056137085, 1.1789555549621582, 11.490800857543945, 4.23802375793457, 9.550930976867676, 8.375045776367188, 7.508914470672607, -0.6570729613304138, -0.3005157709121704, 2.8406054973602295, 3.0828027725219727, 0.7308170199394226, 6.1483540534973145, 0.1376611888408661, -13.424735069274902, -7.746140480041504, -2.322798252105713, -8.305252075195312, 2.98791241645813, -10.99522876739502, 0.15211068093776703, -2.3820347785949707, -1.7984174489974976, 8.49562931060791, -5.852236747741699, -3.755497932434082, 0.6989710927009583, -5.270299434661865, -2.6188621520996094, -1.8828465938568115, -4.6466498374938965, 14.078543663024902, -0.5495333075523376, 10.579157829284668, -3.216050148010254, 9.349003791809082, -4.381077766418457, -11.675816535949707, -2.863020658493042, 4.5721755027771, 2.246612071990967, -4.574341773986816, 1.8610187768936157, 2.3767874240875244, 5.625787734985352, -9.784077644348145, 0.6496725678443909, -1.457950472831726, 0.4263263940811157, -4.921126365661621, -2.4547839164733887, 3.4869801998138428, -0.4265422224998474, 8.341268539428711, 1.356552004814148, 7.096688270568848, -13.102828979492188, 8.01673412322998, -7.115934371948242, 1.8699780702590942, 0.20872099697589874, 14.699383735656738, -1.0252779722213745, -2.6107232570648193, -2.5082311630249023, 8.427192687988281, 6.913852691650391, -6.29124641418457, 0.6157366037368774, 2.489687919616699, -3.4668266773223877, 9.92176342010498, 11.200815200805664, -0.19664029777050018, 7.491600513458252, -0.6231271624565125, -0.2584814429283142, -9.947997093200684, -0.9611040949821472, 1.1649218797683716, -2.1907122135162354, -1.502848744392395, -0.5192610621452332, 15.165953636169434, 2.4649462699890137, -0.998044490814209, 7.44166374206543, -2.0768048763275146, 3.5896823406219482, -7.305543422698975, -7.562084674835205, 4.32333517074585, 0.08044180274009705, -6.564010143280029, -2.314805269241333, -1.7642345428466797, -2.470881700515747, -7.6756181716918945, -9.548877716064453, -1.017755389213562, 0.1698644608259201, 2.5877134799957275, -1.8752295970916748, -0.36614322662353516, -6.049378395080566, -2.3965611457824707, -5.945338726043701, 0.9424033164978027, -13.155974388122559, -7.45780086517334, 0.14658108353614807, -3.7427968978881836, 5.841492652893066, -1.2872905731201172, 5.569431304931641, 12.570590019226074, 1.0939218997955322, 2.2142086029052734, 1.9181575775146484, 6.991420745849609, -5.888138771057129, 3.1409823894500732, -2.0036280155181885, 2.4434285163879395, 9.973138809204102, 5.036680221557617, 2.005120277404785, 2.861560344696045, 5.860223770141602, 2.917618751525879, -1.63111412525177, 2.0292205810546875, -4.070415019989014, -6.831437110900879]}} + [2022-05-25 12:25:36,324] [ INFO] - Response time 0.159053 s. ``` * Python API @@ -299,7 +299,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ``` bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [-1.3251205682754517, 7.860682487487793, -4.620625972747803, 0.3000721037387848, 2.2648534774780273, -1.1931440830230713, 3.064713716506958, 7.673594951629639, -6.004472732543945, -12.024259567260742, -1.9496068954467773, 3.126953601837158, 1.6188379526138306, -7.638310432434082, -1.2299772500991821, -12.33833122253418, 2.1373026371002197, -5.395712375640869, 9.717328071594238, 5.675230503082275, 3.7805123329162598, 3.0597171783447266, 3.429692029953003, 8.9760103225708, 13.174124717712402, -0.5313228368759155, 8.942471504211426, 4.465109825134277, -4.426247596740723, -9.726503372192383, 8.399328231811523, 7.223917484283447, -7.435853958129883, 2.9441683292388916, -4.343039512634277, -13.886964797973633, -1.6346734762191772, -10.902740478515625, -5.311244964599609, 3.800722122192383, 3.897603750228882, -2.123077392578125, -2.3521194458007812, 4.151031017303467, -7.404866695404053, 0.13911646604537964, 2.4626107215881348, 4.96645450592041, 0.9897574186325073, 5.483975410461426, -3.3574001789093018, 10.13400650024414, -0.6120170950889587, -10.403095245361328, 4.600754261016846, 16.009349822998047, -7.78369140625, -4.194530487060547, -6.93686056137085, 1.1789555549621582, 11.490800857543945, 4.23802375793457, 9.550930976867676, 8.375045776367188, 7.508914470672607, -0.6570729613304138, -0.3005157709121704, 2.8406054973602295, 3.0828027725219727, 0.7308170199394226, 6.1483540534973145, 0.1376611888408661, -13.424735069274902, -7.746140480041504, -2.322798252105713, -8.305252075195312, 2.98791241645813, -10.99522876739502, 0.15211068093776703, -2.3820347785949707, -1.7984174489974976, 8.49562931060791, -5.852236747741699, -3.755497932434082, 0.6989710927009583, -5.270299434661865, -2.6188621520996094, -1.8828465938568115, -4.6466498374938965, 14.078543663024902, -0.5495333075523376, 10.579157829284668, -3.216050148010254, 9.349003791809082, -4.381077766418457, -11.675816535949707, -2.863020658493042, 4.5721755027771, 2.246612071990967, -4.574341773986816, 1.8610187768936157, 2.3767874240875244, 5.625787734985352, -9.784077644348145, 0.6496725678443909, -1.457950472831726, 0.4263263940811157, -4.921126365661621, -2.4547839164733887, 3.4869801998138428, -0.4265422224998474, 8.341268539428711, 1.356552004814148, 7.096688270568848, -13.102828979492188, 8.01673412322998, -7.115934371948242, 1.8699780702590942, 0.20872099697589874, 14.699383735656738, -1.0252779722213745, -2.6107232570648193, -2.5082311630249023, 8.427192687988281, 6.913852691650391, -6.29124641418457, 0.6157366037368774, 2.489687919616699, -3.4668266773223877, 9.92176342010498, 11.200815200805664, -0.19664029777050018, 7.491600513458252, -0.6231271624565125, -0.2584814429283142, -9.947997093200684, -0.9611040949821472, 1.1649218797683716, -2.1907122135162354, -1.502848744392395, -0.5192610621452332, 15.165953636169434, 2.4649462699890137, -0.998044490814209, 7.44166374206543, -2.0768048763275146, 3.5896823406219482, -7.305543422698975, -7.562084674835205, 4.32333517074585, 0.08044180274009705, -6.564010143280029, -2.314805269241333, -1.7642345428466797, -2.470881700515747, -7.6756181716918945, -9.548877716064453, -1.017755389213562, 0.1698644608259201, 2.5877134799957275, -1.8752295970916748, -0.36614322662353516, -6.049378395080566, -2.3965611457824707, -5.945338726043701, 0.9424033164978027, -13.155974388122559, -7.45780086517334, 0.14658108353614807, -3.7427968978881836, 5.841492652893066, -1.2872905731201172, 5.569431304931641, 12.570590019226074, 1.0939218997955322, 2.2142086029052734, 1.9181575775146484, 6.991420745849609, -5.888138771057129, 3.1409823894500732, -2.0036280155181885, 2.4434285163879395, 9.973138809204102, 5.036680221557617, 2.005120277404785, 2.861560344696045, 5.860223770141602, 2.917618751525879, -1.63111412525177, 2.0292205810546875, -4.070415019989014, -6.831437110900879]}} ``` #### 7.2 Get the score between speaker audio embedding @@ -331,12 +331,12 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ``` bash - [2022-05-09 10:28:40,556] [ INFO] - vector score http client start - [2022-05-09 10:28:40,556] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav - [2022-05-09 10:28:40,556] [ INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector/score - [2022-05-09 10:28:40,731] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} - [2022-05-09 10:28:40,731] [ INFO] - The vector: None - [2022-05-09 10:28:40,731] [ INFO] - Response time 0.175514 s. + [2022-05-25 12:33:24,527] [ INFO] - vector score http client start + [2022-05-25 12:33:24,527] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-25 12:33:24,528] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector/score + [2022-05-25 12:33:24,695] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + [2022-05-25 12:33:24,696] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + [2022-05-25 12:33:24,696] [ INFO] - Response time 0.168271 s. ``` * Python API @@ -358,10 +358,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ``` bash - [2022-05-09 10:34:54,769] [ INFO] - vector score http client start - [2022-05-09 10:34:54,771] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav - [2022-05-09 10:34:54,771] [ INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector/score - [2022-05-09 10:34:55,026] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} + [2022-05-25 12:30:14,143] [ INFO] - vector score http client start + [2022-05-25 12:30:14,143] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-25 12:30:14,143] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector/score + [2022-05-25 12:30:14,363] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} ``` ### 8. Punctuation prediction diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 4895b182b..8c95a989b 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -277,12 +277,12 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee 输出: ``` bash - [2022-05-08 00:18:44,249] [ INFO] - vector http client start - [2022-05-08 00:18:44,250] [ INFO] - the input audio: 85236145389.wav - [2022-05-08 00:18:44,250] [ INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector - [2022-05-08 00:18:44,250] [ INFO] - http://127.0.0.1:8590/paddlespeech/vector - [2022-05-08 00:18:44,406] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} - [2022-05-08 00:18:44,406] [ INFO] - Response time 0.156481 s. + [2022-05-25 12:25:36,165] [ INFO] - vector http client start + [2022-05-25 12:25:36,165] [ INFO] - the input audio: 85236145389.wav + [2022-05-25 12:25:36,165] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector + [2022-05-25 12:25:36,166] [ INFO] - http://127.0.0.1:8790/paddlespeech/vector + [2022-05-25 12:25:36,324] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [-1.3251205682754517, 7.860682487487793, -4.620625972747803, 0.3000721037387848, 2.2648534774780273, -1.1931440830230713, 3.064713716506958, 7.673594951629639, -6.004472732543945, -12.024259567260742, -1.9496068954467773, 3.126953601837158, 1.6188379526138306, -7.638310432434082, -1.2299772500991821, -12.33833122253418, 2.1373026371002197, -5.395712375640869, 9.717328071594238, 5.675230503082275, 3.7805123329162598, 3.0597171783447266, 3.429692029953003, 8.9760103225708, 13.174124717712402, -0.5313228368759155, 8.942471504211426, 4.465109825134277, -4.426247596740723, -9.726503372192383, 8.399328231811523, 7.223917484283447, -7.435853958129883, 2.9441683292388916, -4.343039512634277, -13.886964797973633, -1.6346734762191772, -10.902740478515625, -5.311244964599609, 3.800722122192383, 3.897603750228882, -2.123077392578125, -2.3521194458007812, 4.151031017303467, -7.404866695404053, 0.13911646604537964, 2.4626107215881348, 4.96645450592041, 0.9897574186325073, 5.483975410461426, -3.3574001789093018, 10.13400650024414, -0.6120170950889587, -10.403095245361328, 4.600754261016846, 16.009349822998047, -7.78369140625, -4.194530487060547, -6.93686056137085, 1.1789555549621582, 11.490800857543945, 4.23802375793457, 9.550930976867676, 8.375045776367188, 7.508914470672607, -0.6570729613304138, -0.3005157709121704, 2.8406054973602295, 3.0828027725219727, 0.7308170199394226, 6.1483540534973145, 0.1376611888408661, -13.424735069274902, -7.746140480041504, -2.322798252105713, -8.305252075195312, 2.98791241645813, -10.99522876739502, 0.15211068093776703, -2.3820347785949707, -1.7984174489974976, 8.49562931060791, -5.852236747741699, -3.755497932434082, 0.6989710927009583, -5.270299434661865, -2.6188621520996094, -1.8828465938568115, -4.6466498374938965, 14.078543663024902, -0.5495333075523376, 10.579157829284668, -3.216050148010254, 9.349003791809082, -4.381077766418457, -11.675816535949707, -2.863020658493042, 4.5721755027771, 2.246612071990967, -4.574341773986816, 1.8610187768936157, 2.3767874240875244, 5.625787734985352, -9.784077644348145, 0.6496725678443909, -1.457950472831726, 0.4263263940811157, -4.921126365661621, -2.4547839164733887, 3.4869801998138428, -0.4265422224998474, 8.341268539428711, 1.356552004814148, 7.096688270568848, -13.102828979492188, 8.01673412322998, -7.115934371948242, 1.8699780702590942, 0.20872099697589874, 14.699383735656738, -1.0252779722213745, -2.6107232570648193, -2.5082311630249023, 8.427192687988281, 6.913852691650391, -6.29124641418457, 0.6157366037368774, 2.489687919616699, -3.4668266773223877, 9.92176342010498, 11.200815200805664, -0.19664029777050018, 7.491600513458252, -0.6231271624565125, -0.2584814429283142, -9.947997093200684, -0.9611040949821472, 1.1649218797683716, -2.1907122135162354, -1.502848744392395, -0.5192610621452332, 15.165953636169434, 2.4649462699890137, -0.998044490814209, 7.44166374206543, -2.0768048763275146, 3.5896823406219482, -7.305543422698975, -7.562084674835205, 4.32333517074585, 0.08044180274009705, -6.564010143280029, -2.314805269241333, -1.7642345428466797, -2.470881700515747, -7.6756181716918945, -9.548877716064453, -1.017755389213562, 0.1698644608259201, 2.5877134799957275, -1.8752295970916748, -0.36614322662353516, -6.049378395080566, -2.3965611457824707, -5.945338726043701, 0.9424033164978027, -13.155974388122559, -7.45780086517334, 0.14658108353614807, -3.7427968978881836, 5.841492652893066, -1.2872905731201172, 5.569431304931641, 12.570590019226074, 1.0939218997955322, 2.2142086029052734, 1.9181575775146484, 6.991420745849609, -5.888138771057129, 3.1409823894500732, -2.0036280155181885, 2.4434285163879395, 9.973138809204102, 5.036680221557617, 2.005120277404785, 2.861560344696045, 5.860223770141602, 2.917618751525879, -1.63111412525177, 2.0292205810546875, -4.070415019989014, -6.831437110900879]}} + [2022-05-25 12:25:36,324] [ INFO] - Response time 0.159053 s. ``` * Python API @@ -302,7 +302,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee 输出: ``` bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.421751856803894, 5.626245498657227, -5.342077255249023, 1.1773887872695923, 3.3080549240112305, 1.7565933465957642, 5.167886257171631, 10.806358337402344, -3.8226819038391113, -5.614140033721924, 2.6238479614257812, -0.8072972893714905, 1.9635076522827148, -7.312870025634766, 0.011035939678549767, -9.723129272460938, 0.6619706153869629, -6.976806163787842, 10.213476181030273, 7.494769096374512, 2.9105682373046875, 3.8949244022369385, 3.799983501434326, 7.106168746948242, 16.90532875061035, -7.149388313293457, 8.733108520507812, 3.423006296157837, -4.831653594970703, -11.403363227844238, 11.232224464416504, 7.127461910247803, -4.282842636108398, 2.452359437942505, -5.130749702453613, -18.17766761779785, -2.6116831302642822, -11.000344276428223, -6.731433391571045, 1.6564682722091675, 0.7618281245231628, 1.125300407409668, -2.0838370323181152, 4.725743293762207, -8.782588005065918, -3.5398752689361572, 3.8142364025115967, 5.142068862915039, 2.1620609760284424, 4.09643030166626, -6.416214942932129, 12.747446060180664, 1.9429892301559448, -15.15294361114502, 6.417416095733643, 16.09701156616211, -9.716667175292969, -1.9920575618743896, -3.36494779586792, -1.8719440698623657, 11.567351341247559, 3.6978814601898193, 11.258262634277344, 7.442368507385254, 9.183408737182617, 4.528149127960205, -1.2417854070663452, 4.395912170410156, 6.6727728843688965, 5.88988733291626, 7.627128601074219, -0.6691966652870178, -11.889698028564453, -9.20886516571045, -7.42740535736084, -3.777663230895996, 6.917238712310791, -9.848755836486816, -2.0944676399230957, -5.1351165771484375, 0.4956451654434204, 9.317537307739258, -5.914181232452393, -1.809860348701477, -0.11738915741443634, -7.1692705154418945, -1.057827353477478, -5.721670627593994, -5.117385387420654, 16.13765525817871, -4.473617076873779, 7.6624321937561035, -0.55381840467453, 9.631585121154785, -6.470459461212158, -8.548508644104004, 4.371616840362549, -0.7970245480537415, 4.4789886474609375, -2.975860834121704, 3.2721822261810303, 2.838287830352783, 5.134591102600098, -9.19079875946045, -0.5657302737236023, -4.8745832443237305, 2.3165574073791504, -5.984319686889648, -2.1798853874206543, 0.3554139733314514, -0.3178512752056122, 9.493552207946777, 2.1144471168518066, 4.358094692230225, -12.089824676513672, 8.451693534851074, -7.925466537475586, 4.624246597290039, 4.428936958312988, 18.69200897216797, -2.6204581260681152, -5.14918851852417, -0.3582090139389038, 8.488558769226074, 4.98148775100708, -9.326835632324219, -2.2544219493865967, 6.641760349273682, 1.2119598388671875, 10.977124214172363, 16.555034637451172, 3.3238420486450195, 9.551861763000488, -1.6676981449127197, -0.7953944206237793, -8.605667114257812, -0.4735655188560486, 2.674196243286133, -5.359177112579346, -2.66738224029541, 0.6660683155059814, 15.44322681427002, 4.740593433380127, -3.472534418106079, 11.592567443847656, -2.0544962882995605, 1.736127495765686, -8.265326499938965, -9.30447769165039, 5.406829833984375, -1.518022894859314, -7.746612548828125, -6.089611053466797, 0.07112743705511093, -0.3490503430366516, -8.64989185333252, -9.998957633972168, -2.564845085144043, -0.5399947762489319, 2.6018123626708984, -0.3192799389362335, -1.8815255165100098, -2.0721492767333984, -3.410574436187744, -8.29980754852295, 1.483638048171997, -15.365986824035645, -8.288211822509766, 3.884779930114746, -3.4876468181610107, 7.362999439239502, 0.4657334089279175, 3.1326050758361816, 12.438895225524902, -1.8337041139602661, 4.532927989959717, 2.7264339923858643, 10.14534854888916, -6.521963596343994, 2.897155523300171, -3.392582654953003, 5.079153060913086, 7.7597246170043945, 4.677570819854736, 5.845779895782471, 2.402411460876465, 7.7071051597595215, 3.9711380004882812, -6.39003849029541, 6.12687873840332, -3.776029348373413, -11.118121147155762]}} + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [-1.3251205682754517, 7.860682487487793, -4.620625972747803, 0.3000721037387848, 2.2648534774780273, -1.1931440830230713, 3.064713716506958, 7.673594951629639, -6.004472732543945, -12.024259567260742, -1.9496068954467773, 3.126953601837158, 1.6188379526138306, -7.638310432434082, -1.2299772500991821, -12.33833122253418, 2.1373026371002197, -5.395712375640869, 9.717328071594238, 5.675230503082275, 3.7805123329162598, 3.0597171783447266, 3.429692029953003, 8.9760103225708, 13.174124717712402, -0.5313228368759155, 8.942471504211426, 4.465109825134277, -4.426247596740723, -9.726503372192383, 8.399328231811523, 7.223917484283447, -7.435853958129883, 2.9441683292388916, -4.343039512634277, -13.886964797973633, -1.6346734762191772, -10.902740478515625, -5.311244964599609, 3.800722122192383, 3.897603750228882, -2.123077392578125, -2.3521194458007812, 4.151031017303467, -7.404866695404053, 0.13911646604537964, 2.4626107215881348, 4.96645450592041, 0.9897574186325073, 5.483975410461426, -3.3574001789093018, 10.13400650024414, -0.6120170950889587, -10.403095245361328, 4.600754261016846, 16.009349822998047, -7.78369140625, -4.194530487060547, -6.93686056137085, 1.1789555549621582, 11.490800857543945, 4.23802375793457, 9.550930976867676, 8.375045776367188, 7.508914470672607, -0.6570729613304138, -0.3005157709121704, 2.8406054973602295, 3.0828027725219727, 0.7308170199394226, 6.1483540534973145, 0.1376611888408661, -13.424735069274902, -7.746140480041504, -2.322798252105713, -8.305252075195312, 2.98791241645813, -10.99522876739502, 0.15211068093776703, -2.3820347785949707, -1.7984174489974976, 8.49562931060791, -5.852236747741699, -3.755497932434082, 0.6989710927009583, -5.270299434661865, -2.6188621520996094, -1.8828465938568115, -4.6466498374938965, 14.078543663024902, -0.5495333075523376, 10.579157829284668, -3.216050148010254, 9.349003791809082, -4.381077766418457, -11.675816535949707, -2.863020658493042, 4.5721755027771, 2.246612071990967, -4.574341773986816, 1.8610187768936157, 2.3767874240875244, 5.625787734985352, -9.784077644348145, 0.6496725678443909, -1.457950472831726, 0.4263263940811157, -4.921126365661621, -2.4547839164733887, 3.4869801998138428, -0.4265422224998474, 8.341268539428711, 1.356552004814148, 7.096688270568848, -13.102828979492188, 8.01673412322998, -7.115934371948242, 1.8699780702590942, 0.20872099697589874, 14.699383735656738, -1.0252779722213745, -2.6107232570648193, -2.5082311630249023, 8.427192687988281, 6.913852691650391, -6.29124641418457, 0.6157366037368774, 2.489687919616699, -3.4668266773223877, 9.92176342010498, 11.200815200805664, -0.19664029777050018, 7.491600513458252, -0.6231271624565125, -0.2584814429283142, -9.947997093200684, -0.9611040949821472, 1.1649218797683716, -2.1907122135162354, -1.502848744392395, -0.5192610621452332, 15.165953636169434, 2.4649462699890137, -0.998044490814209, 7.44166374206543, -2.0768048763275146, 3.5896823406219482, -7.305543422698975, -7.562084674835205, 4.32333517074585, 0.08044180274009705, -6.564010143280029, -2.314805269241333, -1.7642345428466797, -2.470881700515747, -7.6756181716918945, -9.548877716064453, -1.017755389213562, 0.1698644608259201, 2.5877134799957275, -1.8752295970916748, -0.36614322662353516, -6.049378395080566, -2.3965611457824707, -5.945338726043701, 0.9424033164978027, -13.155974388122559, -7.45780086517334, 0.14658108353614807, -3.7427968978881836, 5.841492652893066, -1.2872905731201172, 5.569431304931641, 12.570590019226074, 1.0939218997955322, 2.2142086029052734, 1.9181575775146484, 6.991420745849609, -5.888138771057129, 3.1409823894500732, -2.0036280155181885, 2.4434285163879395, 9.973138809204102, 5.036680221557617, 2.005120277404785, 2.861560344696045, 5.860223770141602, 2.917618751525879, -1.63111412525177, 2.0292205810546875, -4.070415019989014, -6.831437110900879]}} ``` #### 7.2 音频声纹打分 @@ -333,12 +333,12 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee 输出: ``` bash - [2022-05-09 10:28:40,556] [ INFO] - vector score http client start - [2022-05-09 10:28:40,556] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav - [2022-05-09 10:28:40,556] [ INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector/score - [2022-05-09 10:28:40,731] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} - [2022-05-09 10:28:40,731] [ INFO] - The vector: None - [2022-05-09 10:28:40,731] [ INFO] - Response time 0.175514 s. + [2022-05-25 12:33:24,527] [ INFO] - vector score http client start + [2022-05-25 12:33:24,527] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-25 12:33:24,528] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector/score + [2022-05-25 12:33:24,695] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + [2022-05-25 12:33:24,696] [ INFO] - The vector: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + [2022-05-25 12:33:24,696] [ INFO] - Response time 0.168271 s. ``` * Python API @@ -352,7 +352,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee enroll_audio="85236145389.wav", test_audio="123456789.wav", server_ip="127.0.0.1", - port=8090, + port=8790, task="score") print(res) ``` @@ -360,10 +360,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee 输出: ``` bash - [2022-05-09 10:34:54,769] [ INFO] - vector score http client start - [2022-05-09 10:34:54,771] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav - [2022-05-09 10:34:54,771] [ INFO] - endpoint: http://127.0.0.1:8590/paddlespeech/vector/score - [2022-05-09 10:34:55,026] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}} + [2022-05-25 12:30:14,143] [ INFO] - vector score http client start + [2022-05-25 12:30:14,143] [ INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav + [2022-05-25 12:30:14,143] [ INFO] - endpoint: http://127.0.0.1:8790/paddlespeech/vector/score + [2022-05-25 12:30:14,363] [ INFO] - The vector score is: {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.45332613587379456}} ``` diff --git a/paddlespeech/server/tests/vector/http_client.py b/paddlespeech/server/tests/vector/http_client.py deleted file mode 100644 index 49f2adf7c..000000000 --- a/paddlespeech/server/tests/vector/http_client.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the -import base64 -import json -import time - -import requests - - -def readwav2base64(wav_file): - """ - read wave file and covert to base64 string - """ - with open(wav_file, 'rb') as f: - base64_bytes = base64.b64encode(f.read()) - base64_string = base64_bytes.decode('utf-8') - return base64_string - - -def main(): - """ - main func - """ - url = "http://127.0.0.1:8090/paddlespeech/asr" - - # start Timestamp - time_start = time.time() - - test_audio_dir = "./16_audio.wav" - audio = readwav2base64(test_audio_dir) - - data = { - "audio": audio, - "audio_format": "wav", - "sample_rate": 16000, - "lang": "zh_cn", - } - - r = requests.post(url=url, data=json.dumps(data)) - - # ending Timestamp - time_end = time.time() - print('time cost', time_end - time_start, 's') - - print(r.json()) - - -if __name__ == "__main__": - main() From a5605978fad194e6877f6d74349c3d8ee2d89e54 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 25 May 2022 13:07:56 +0800 Subject: [PATCH 3/5] update the acs note, test=doc --- paddlespeech/server/bin/paddlespeech_client.py | 1 + paddlespeech/server/engine/acs/python/acs_engine.py | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 74e7ce3fe..fb521b309 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -752,6 +752,7 @@ class VectorClientExecutor(BaseExecutor): res = handler.run(enroll_audio, test_audio, audio_format, sample_rate) logger.info(f"The vector score is: {res}") + return res else: logger.error(f"Sorry, we have not support such task {task}") diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py index d52852dcf..3eb47e86d 100644 --- a/paddlespeech/server/engine/acs/python/acs_engine.py +++ b/paddlespeech/server/engine/acs/python/acs_engine.py @@ -118,12 +118,6 @@ class ACSEngine(BaseEngine): msg = ws.recv() msg = json.loads(msg) logger.info(f"audio result: {msg}") - # samples, sample_rate = soundfile.read(audio_data, dtype='int16') - - # ws.send_binary(samples.tobytes()) - # msg = ws.recv() - # msg = json.loads(msg) - # logger.info(f"audio result: {msg}") # 3. send chunk audio data to engine logger.info("send the end signal") From be8a78a9d1a70e771f841adb5453e91ca19d5966 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 25 May 2022 14:22:38 +0800 Subject: [PATCH 4/5] fix the vector model type error, test=doc --- demos/audio_content_search/conf/acs_application.yaml | 1 + docs/source/released_model.md | 2 +- paddlespeech/server/engine/acs/python/acs_engine.py | 7 +++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/demos/audio_content_search/conf/acs_application.yaml b/demos/audio_content_search/conf/acs_application.yaml index d3c5e3039..dbddd06fb 100644 --- a/demos/audio_content_search/conf/acs_application.yaml +++ b/demos/audio_content_search/conf/acs_application.yaml @@ -28,6 +28,7 @@ acs_python: word_list: "./conf/words.txt" sample_rate: 16000 device: 'cpu' # set 'gpu:id' or 'cpu' + ping_timeout: 100 # seconds diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 3231fecd4..67e7b62e1 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -82,7 +82,7 @@ PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https Model Type | Dataset| Example Link | Pretrained Models | Static Models :-------------:| :------------:| :-----: | :-----: | :-----: -PANN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | - +ECAPA-TDNN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | - ## Punctuation Restoration Models Model Type | Dataset| Example Link | Pretrained Models diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py index 3eb47e86d..930101ac9 100644 --- a/paddlespeech/server/engine/acs/python/acs_engine.py +++ b/paddlespeech/server/engine/acs/python/acs_engine.py @@ -46,6 +46,9 @@ class ACSEngine(BaseEngine): try: self.config = config self.device = self.config.get("device", paddle.get_device()) + + # websocket default ping timeout is 20 seconds + self.ping_timeout = self.config.get("ping_timeout", 20) paddle.set_device(self.device) logger.info(f"ACS Engine set the device: {self.device}") @@ -97,8 +100,8 @@ class ACSEngine(BaseEngine): logger.error("No asr server, please input valid ip and port") return "" ws = websocket.WebSocket() - ws.connect(self.url) - # with websocket.WebSocket.connect(self.url) as ws: + logger.info(f"set the ping timeout: {self.ping_timeout} seconds") + ws.connect(self.url, ping_timeout=self.ping_timeout) audio_info = json.dumps( { "name": "test.wav", From 07c0d7d7cc265ba191b002e6e62a40dccb1f55ff Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 25 May 2022 14:25:43 +0800 Subject: [PATCH 5/5] remove old vector model info, test=doc --- demos/audio_content_search/README.md | 7 ++++++- demos/audio_content_search/README_cn.md | 6 +++++- demos/speech_server/README_cn.md | 2 +- examples/voxceleb/sv0/RESULT.md | 1 - 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/demos/audio_content_search/README.md b/demos/audio_content_search/README.md index d73d6a59d..4428bf389 100644 --- a/demos/audio_content_search/README.md +++ b/demos/audio_content_search/README.md @@ -16,7 +16,12 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc You can choose one way from meduim and hard to install paddlespeech. -The dependency refers to the requirements.txt +The dependency refers to the requirements.txt, and install the dependency as follows: + +``` +pip install -r requriement.txt +``` + ### 2. Prepare Input File The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/audio_content_search/README_cn.md b/demos/audio_content_search/README_cn.md index c74af4cf1..6f51c4cf2 100644 --- a/demos/audio_content_search/README_cn.md +++ b/demos/audio_content_search/README_cn.md @@ -16,7 +16,11 @@ 请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。 你可以从 medium,hard 三中方式中选择一种方式安装。 -依赖参见 requirements.txt +依赖参见 requirements.txt, 安装依赖 + +``` +pip install -r requriement.txt +``` ### 2. 准备输入 这个 demo 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 8c95a989b..29629b7e8 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -352,7 +352,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee enroll_audio="85236145389.wav", test_audio="123456789.wav", server_ip="127.0.0.1", - port=8790, + port=8090, task="score") print(res) ``` diff --git a/examples/voxceleb/sv0/RESULT.md b/examples/voxceleb/sv0/RESULT.md index a1d2a1812..56ee887c6 100644 --- a/examples/voxceleb/sv0/RESULT.md +++ b/examples/voxceleb/sv0/RESULT.md @@ -4,5 +4,4 @@ | Model | Number of Params | Release | Config | dim | Test set | Cosine | Cosine + S-Norm | | --- | --- | --- | --- | --- | --- | --- | ---- | -| ECAPA-TDNN | 85M | 0.2.0 | conf/ecapa_tdnn.yaml |192 | test | 1.02 | 0.95 | | ECAPA-TDNN | 85M | 0.2.1 | conf/ecapa_tdnn.yaml | 192 | test | 0.8188 | 0.7815|