change the vector output to numpy.array

pull/1614/head
xiongxinlei 3 years ago
parent 5ae57206f3
commit ed7113f320

@ -66,10 +66,12 @@ TRIAL_LIST = {
"veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1 "veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1
"veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned) "veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned)
"list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H "list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H
"list_test_hard2.txt": "857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned) "list_test_hard2.txt":
"857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned)
"list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E "list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E
"list_test_all2.txt": "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned) "list_test_all2.txt":
} "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned)
}
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
@ -176,6 +178,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file # create the manifest file
create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path) create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
def prepare_trial(base_url, data_list, target_dir): def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(target_dir): if not os.path.exists(target_dir):
os.makedirs(target_dir) os.makedirs(target_dir)
@ -185,6 +188,8 @@ def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(os.path.join(target_dir, trial)): if not os.path.exists(os.path.join(target_dir, trial)):
download_url = " --no-check-certificate " + base_url + "/" + trial download_url = " --no-check-certificate " + base_url + "/" + trial
download(url=download_url, md5sum=md5sum, target_dir=target_dir) download(url=download_url, md5sum=md5sum, target_dir=target_dir)
def main(): def main():
if args.target_dir.startswith('~'): if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir) args.target_dir = os.path.expanduser(args.target_dir)
@ -209,8 +214,7 @@ def main():
prepare_trial( prepare_trial(
base_url=TRIAL_BASE_URL, base_url=TRIAL_BASE_URL,
data_list=TRIAL_LIST, data_list=TRIAL_LIST,
target_dir=os.path.dirname(args.manifest_prefix) target_dir=os.path.dirname(args.manifest_prefix))
)
print("Manifest prepare done!") print("Manifest prepare done!")

@ -22,12 +22,10 @@ import codecs
import glob import glob
import json import json
import os import os
import subprocess
from pathlib import Path from pathlib import Path
import soundfile import soundfile
from utils.utility import check_md5sum
from utils.utility import download from utils.utility import download
from utils.utility import unzip from utils.utility import unzip
@ -40,7 +38,6 @@ BASE_URL = "--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxcele
DEV_DATA_URL = BASE_URL + '/vox2_aac.zip' DEV_DATA_URL = BASE_URL + '/vox2_aac.zip'
DEV_MD5SUM = "bbc063c46078a602ca71605645c2a402" DEV_MD5SUM = "bbc063c46078a602ca71605645c2a402"
# test data # test data
TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip' TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip'
TEST_MD5SUM = "0d2b3ea430a821c33263b5ea37ede312" TEST_MD5SUM = "0d2b3ea430a821c33263b5ea37ede312"
@ -56,11 +53,13 @@ parser.add_argument(
default="manifest", default="manifest",
type=str, type=str,
help="Filepath prefix for output manifests. (default: %(default)s)") help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument("--download", parser.add_argument(
"--download",
default=False, default=False,
action="store_true", action="store_true",
help="Download the voxceleb2 dataset. (default: %(default)s)") help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument("--generate", parser.add_argument(
"--generate",
default=False, default=False,
action="store_true", action="store_true",
help="Generate the manifest files. (default: %(default)s)") help="Generate the manifest files. (default: %(default)s)")
@ -157,7 +156,9 @@ def main():
print("VoxCeleb2 download is done!") print("VoxCeleb2 download is done!")
if args.generate: if args.generate:
create_manifest(args.target_dir, manifest_path_prefix=args.manifest_prefix) create_manifest(
args.target_dir, manifest_path_prefix=args.manifest_prefix)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

@ -26,7 +26,8 @@ def get_audios(path):
""" """
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"] supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [ return [
item for sublist in [[os.path.join(dir, file) for file in files] item
for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))] for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats for item in sublist if os.path.splitext(item)[1] in supported_formats
] ]

@ -46,56 +46,46 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output: Output:
```bash ```bash
demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
5.3940268 , -3.04878 , 1.611095 , 10.127234 , -3.04878 1.611095 10.127234 -10.534177 -15.821609
-10.534177 , -15.821609 , 1.2032688 , -0.35080156, 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
1.2629458 , -12.643498 , -2.5758228 , -11.343508 , -11.343508 2.3385992 -8.719341 14.213509 15.404744
2.3385992 , -8.719341 , 14.213509 , 15.404744 , -0.39327756 6.338786 2.688887 8.7104025 17.469526
-0.39327756, 6.338786 , 2.688887 , 8.7104025 , -8.77959 7.0576906 4.648855 -1.3089896 -23.294737
17.469526 , -8.77959 , 7.0576906 , 4.648855 , 8.013747 13.891729 -9.926753 5.655307 -5.9422326
-1.3089896 , -23.294737 , 8.013747 , 13.891729 , -22.842539 0.6293588 -18.46266 -10.811862 9.8192625
-9.926753 , 5.655307 , -5.9422326 , -22.842539 , 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
0.6293588 , -18.46266 , -10.811862 , 9.8192625 , 1.7594414 -0.6485091 4.485623 2.0207152 7.264915
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , -6.40137 23.63524 2.9711294 -22.708025 9.93719
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 , 20.354511 -10.324688 -0.700492 -8.783211 -5.27593
2.0207152 , 7.264915 , -6.40137 , 23.63524 , 15.999649 3.3004563 12.747926 15.429879 4.7849145
2.9711294 , -22.708025 , 9.93719 , 20.354511 , 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
-10.324688 , -0.700492 , -8.783211 , -5.27593 , 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
15.999649 , 3.3004563 , 12.747926 , 15.429879 , -9.224193 14.568347 -10.568833 4.982321 -4.342062
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , -6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 , -11.54324 7.681869 0.44475392 9.708182 -8.932846
14.568347 , -10.568833 , 4.982321 , -4.342062 , 0.4123232 -4.361452 1.3948607 9.511665 0.11667654
0.0914714 , 12.645902 , -5.74285 , -3.2141201 , 2.9079323 6.049952 9.275183 -18.078873 6.2983274
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 , -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
-6.7964664 , 16.865469 , -11.54324 , 7.681869 , 4.010979 11.000591 -2.8873312 7.1352735 -16.79663
0.44475392, 9.708182 , -8.932846 , 0.4123232 , 18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.361452 , 1.3948607 , 9.511665 , 0.11667654, -4.875734 -3.0836344 -2.9999814 13.751918 6.448228
2.9079323 , 6.049952 , 9.275183 , -18.078873 , -11.924197 2.171869 2.0423572 -6.173772 10.778437
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , 25.77281 -4.9495463 14.57806 0.3044315 2.6132357
3.3404543 , 2.990815 , 4.010979 , 11.000591 , -7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 , -4.9401326 23.465864 5.1685796 -9.018578 9.037825
-14.293832 , 7.89578 , 2.2714825 , 22.976387 , -4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 , -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
6.448228 , -11.924197 , 2.171869 , 2.0423572 , -12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-6.173772 , 10.778437 , 25.77281 , -4.9495463 , -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
14.57806 , 0.3044315 , 2.6132357 , -7.591999 , 8.731719 -20.778936 -11.495662 5.8033476 -4.752041
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 , 10.833007 -6.717991 4.504732 13.4244375 1.1306485
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 , 7.3435574 1.400918 14.704036 -9.501399 7.2315617
9.037825 , -4.4150195 , 6.859591 , -12.274467 , -6.417456 1.3333273 11.872697 -0.30664724 8.8845
-0.88911164, 5.186309 , -3.9988663 , -13.638606 , 6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-9.925445 , -0.06329413, -3.6709652 , -12.397416 , -3.2701402 -11.508579 ]
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
``` ```
- Python API - Python API
@ -118,55 +108,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output: Output:
```bash ```bash
# Vector Result: # Vector Result:
{'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
5.3940268 , -3.04878 , 1.611095 , 10.127234 , -3.04878 1.611095 10.127234 -10.534177 -15.821609
-10.534177 , -15.821609 , 1.2032688 , -0.35080156, 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
1.2629458 , -12.643498 , -2.5758228 , -11.343508 , -11.343508 2.3385992 -8.719341 14.213509 15.404744
2.3385992 , -8.719341 , 14.213509 , 15.404744 , -0.39327756 6.338786 2.688887 8.7104025 17.469526
-0.39327756, 6.338786 , 2.688887 , 8.7104025 , -8.77959 7.0576906 4.648855 -1.3089896 -23.294737
17.469526 , -8.77959 , 7.0576906 , 4.648855 , 8.013747 13.891729 -9.926753 5.655307 -5.9422326
-1.3089896 , -23.294737 , 8.013747 , 13.891729 , -22.842539 0.6293588 -18.46266 -10.811862 9.8192625
-9.926753 , 5.655307 , -5.9422326 , -22.842539 , 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
0.6293588 , -18.46266 , -10.811862 , 9.8192625 , 1.7594414 -0.6485091 4.485623 2.0207152 7.264915
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , -6.40137 23.63524 2.9711294 -22.708025 9.93719
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 , 20.354511 -10.324688 -0.700492 -8.783211 -5.27593
2.0207152 , 7.264915 , -6.40137 , 23.63524 , 15.999649 3.3004563 12.747926 15.429879 4.7849145
2.9711294 , -22.708025 , 9.93719 , 20.354511 , 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
-10.324688 , -0.700492 , -8.783211 , -5.27593 , 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
15.999649 , 3.3004563 , 12.747926 , 15.429879 , -9.224193 14.568347 -10.568833 4.982321 -4.342062
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , -6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 , -11.54324 7.681869 0.44475392 9.708182 -8.932846
14.568347 , -10.568833 , 4.982321 , -4.342062 , 0.4123232 -4.361452 1.3948607 9.511665 0.11667654
0.0914714 , 12.645902 , -5.74285 , -3.2141201 , 2.9079323 6.049952 9.275183 -18.078873 6.2983274
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 , -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
-6.7964664 , 16.865469 , -11.54324 , 7.681869 , 4.010979 11.000591 -2.8873312 7.1352735 -16.79663
0.44475392, 9.708182 , -8.932846 , 0.4123232 , 18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.361452 , 1.3948607 , 9.511665 , 0.11667654, -4.875734 -3.0836344 -2.9999814 13.751918 6.448228
2.9079323 , 6.049952 , 9.275183 , -18.078873 , -11.924197 2.171869 2.0423572 -6.173772 10.778437
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , 25.77281 -4.9495463 14.57806 0.3044315 2.6132357
3.3404543 , 2.990815 , 4.010979 , 11.000591 , -7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 , -4.9401326 23.465864 5.1685796 -9.018578 9.037825
-14.293832 , 7.89578 , 2.2714825 , 22.976387 , -4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 , -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
6.448228 , -11.924197 , 2.171869 , 2.0423572 , -12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-6.173772 , 10.778437 , 25.77281 , -4.9495463 , -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
14.57806 , 0.3044315 , 2.6132357 , -7.591999 , 8.731719 -20.778936 -11.495662 5.8033476 -4.752041
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 , 10.833007 -6.717991 4.504732 13.4244375 1.1306485
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 , 7.3435574 1.400918 14.704036 -9.501399 7.2315617
9.037825 , -4.4150195 , 6.859591 , -12.274467 , -6.417456 1.3333273 11.872697 -0.30664724 8.8845
-0.88911164, 5.186309 , -3.9988663 , -13.638606 , 6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-9.925445 , -0.06329413, -3.6709652 , -12.397416 , -3.2701402 -11.508579 ]
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
``` ```
### 4.Pretrained Models ### 4.Pretrained Models

@ -45,55 +45,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出: 输出:
```bash ```bash
demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
5.3940268 , -3.04878 , 1.611095 , 10.127234 , -3.04878 1.611095 10.127234 -10.534177 -15.821609
-10.534177 , -15.821609 , 1.2032688 , -0.35080156, 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
1.2629458 , -12.643498 , -2.5758228 , -11.343508 , -11.343508 2.3385992 -8.719341 14.213509 15.404744
2.3385992 , -8.719341 , 14.213509 , 15.404744 , -0.39327756 6.338786 2.688887 8.7104025 17.469526
-0.39327756, 6.338786 , 2.688887 , 8.7104025 , -8.77959 7.0576906 4.648855 -1.3089896 -23.294737
17.469526 , -8.77959 , 7.0576906 , 4.648855 , 8.013747 13.891729 -9.926753 5.655307 -5.9422326
-1.3089896 , -23.294737 , 8.013747 , 13.891729 , -22.842539 0.6293588 -18.46266 -10.811862 9.8192625
-9.926753 , 5.655307 , -5.9422326 , -22.842539 , 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
0.6293588 , -18.46266 , -10.811862 , 9.8192625 , 1.7594414 -0.6485091 4.485623 2.0207152 7.264915
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , -6.40137 23.63524 2.9711294 -22.708025 9.93719
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 , 20.354511 -10.324688 -0.700492 -8.783211 -5.27593
2.0207152 , 7.264915 , -6.40137 , 23.63524 , 15.999649 3.3004563 12.747926 15.429879 4.7849145
2.9711294 , -22.708025 , 9.93719 , 20.354511 , 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
-10.324688 , -0.700492 , -8.783211 , -5.27593 , 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
15.999649 , 3.3004563 , 12.747926 , 15.429879 , -9.224193 14.568347 -10.568833 4.982321 -4.342062
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , -6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 , -11.54324 7.681869 0.44475392 9.708182 -8.932846
14.568347 , -10.568833 , 4.982321 , -4.342062 , 0.4123232 -4.361452 1.3948607 9.511665 0.11667654
0.0914714 , 12.645902 , -5.74285 , -3.2141201 , 2.9079323 6.049952 9.275183 -18.078873 6.2983274
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 , -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
-6.7964664 , 16.865469 , -11.54324 , 7.681869 , 4.010979 11.000591 -2.8873312 7.1352735 -16.79663
0.44475392, 9.708182 , -8.932846 , 0.4123232 , 18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.361452 , 1.3948607 , 9.511665 , 0.11667654, -4.875734 -3.0836344 -2.9999814 13.751918 6.448228
2.9079323 , 6.049952 , 9.275183 , -18.078873 , -11.924197 2.171869 2.0423572 -6.173772 10.778437
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , 25.77281 -4.9495463 14.57806 0.3044315 2.6132357
3.3404543 , 2.990815 , 4.010979 , 11.000591 , -7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 , -4.9401326 23.465864 5.1685796 -9.018578 9.037825
-14.293832 , 7.89578 , 2.2714825 , 22.976387 , -4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 , -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
6.448228 , -11.924197 , 2.171869 , 2.0423572 , -12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-6.173772 , 10.778437 , 25.77281 , -4.9495463 , -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
14.57806 , 0.3044315 , 2.6132357 , -7.591999 , 8.731719 -20.778936 -11.495662 5.8033476 -4.752041
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 , 10.833007 -6.717991 4.504732 13.4244375 1.1306485
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 , 7.3435574 1.400918 14.704036 -9.501399 7.2315617
9.037825 , -4.4150195 , 6.859591 , -12.274467 , -6.417456 1.3333273 11.872697 -0.30664724 8.8845
-0.88911164, 5.186309 , -3.9988663 , -13.638606 , 6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-9.925445 , -0.06329413, -3.6709652 , -12.397416 , -3.2701402 -11.508579 ]
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
``` ```
- Python API - Python API
@ -116,55 +106,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出: 输出:
```bash ```bash
# Vector Result: # Vector Result:
{'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
5.3940268 , -3.04878 , 1.611095 , 10.127234 , -3.04878 1.611095 10.127234 -10.534177 -15.821609
-10.534177 , -15.821609 , 1.2032688 , -0.35080156, 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
1.2629458 , -12.643498 , -2.5758228 , -11.343508 , -11.343508 2.3385992 -8.719341 14.213509 15.404744
2.3385992 , -8.719341 , 14.213509 , 15.404744 , -0.39327756 6.338786 2.688887 8.7104025 17.469526
-0.39327756, 6.338786 , 2.688887 , 8.7104025 , -8.77959 7.0576906 4.648855 -1.3089896 -23.294737
17.469526 , -8.77959 , 7.0576906 , 4.648855 , 8.013747 13.891729 -9.926753 5.655307 -5.9422326
-1.3089896 , -23.294737 , 8.013747 , 13.891729 , -22.842539 0.6293588 -18.46266 -10.811862 9.8192625
-9.926753 , 5.655307 , -5.9422326 , -22.842539 , 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
0.6293588 , -18.46266 , -10.811862 , 9.8192625 , 1.7594414 -0.6485091 4.485623 2.0207152 7.264915
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , -6.40137 23.63524 2.9711294 -22.708025 9.93719
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 , 20.354511 -10.324688 -0.700492 -8.783211 -5.27593
2.0207152 , 7.264915 , -6.40137 , 23.63524 , 15.999649 3.3004563 12.747926 15.429879 4.7849145
2.9711294 , -22.708025 , 9.93719 , 20.354511 , 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
-10.324688 , -0.700492 , -8.783211 , -5.27593 , 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
15.999649 , 3.3004563 , 12.747926 , 15.429879 , -9.224193 14.568347 -10.568833 4.982321 -4.342062
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , -6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 , -11.54324 7.681869 0.44475392 9.708182 -8.932846
14.568347 , -10.568833 , 4.982321 , -4.342062 , 0.4123232 -4.361452 1.3948607 9.511665 0.11667654
0.0914714 , 12.645902 , -5.74285 , -3.2141201 , 2.9079323 6.049952 9.275183 -18.078873 6.2983274
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 , -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
-6.7964664 , 16.865469 , -11.54324 , 7.681869 , 4.010979 11.000591 -2.8873312 7.1352735 -16.79663
0.44475392, 9.708182 , -8.932846 , 0.4123232 , 18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.361452 , 1.3948607 , 9.511665 , 0.11667654, -4.875734 -3.0836344 -2.9999814 13.751918 6.448228
2.9079323 , 6.049952 , 9.275183 , -18.078873 , -11.924197 2.171869 2.0423572 -6.173772 10.778437
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , 25.77281 -4.9495463 14.57806 0.3044315 2.6132357
3.3404543 , 2.990815 , 4.010979 , 11.000591 , -7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 , -4.9401326 23.465864 5.1685796 -9.018578 9.037825
-14.293832 , 7.89578 , 2.2714825 , 22.976387 , -4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 , -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
6.448228 , -11.924197 , 2.171869 , 2.0423572 , -12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-6.173772 , 10.778437 , 25.77281 , -4.9495463 , -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
14.57806 , 0.3044315 , 2.6132357 , -7.591999 , 8.731719 -20.778936 -11.495662 5.8033476 -4.752041
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 , 10.833007 -6.717991 4.504732 13.4244375 1.1306485
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 , 7.3435574 1.400918 14.704036 -9.501399 7.2315617
9.037825 , -4.4150195 , 6.859591 , -12.274467 , -6.417456 1.3333273 11.872697 -0.30664724 8.8845
-0.88911164, 5.186309 , -3.9988663 , -13.638606 , 6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-9.925445 , -0.06329413, -3.6709652 , -12.397416 , -3.2701402 -11.508579 ]
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
``` ```
### 4.预训练模型 ### 4.预训练模型

@ -48,9 +48,3 @@ You can do the conversion using ffmpeg https://gist.github.com/seungwonpark/4f2
|VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 | |VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 |
|VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 | |VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 |
|VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 | |VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 |

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse import argparse
import os
import paddle import paddle
from yacs.config import CfgNode from yacs.config import CfgNode

@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
from .esc50 import ESC50 from .esc50 import ESC50
from .gtzan import GTZAN from .gtzan import GTZAN
from .rirs_noises import OpenRIRNoise
from .tess import TESS from .tess import TESS
from .urban_sound import UrbanSound8K from .urban_sound import UrbanSound8K
from .voxceleb import VoxCeleb from .voxceleb import VoxCeleb
from .rirs_noises import OpenRIRNoise

@ -13,12 +13,9 @@
# limitations under the License. # limitations under the License.
import collections import collections
import csv import csv
import glob
import os import os
import random import random
from typing import Dict
from typing import List from typing import List
from typing import Tuple
from paddle.io import Dataset from paddle.io import Dataset
from tqdm import tqdm from tqdm import tqdm
@ -26,7 +23,6 @@ from tqdm import tqdm
from ..backends import load as load_audio from ..backends import load as load_audio
from ..backends import save as save_wav from ..backends import save as save_wav
from ..utils import DATA_HOME from ..utils import DATA_HOME
from ..utils import decompress
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from .dataset import feat_funcs from .dataset import feat_funcs

@ -17,9 +17,7 @@ import glob
import os import os
import random import random
from multiprocessing import cpu_count from multiprocessing import cpu_count
from typing import Dict
from typing import List from typing import List
from typing import Tuple
from paddle.io import Dataset from paddle.io import Dataset
from pathos.multiprocessing import Pool from pathos.multiprocessing import Pool
@ -135,7 +133,7 @@ class VoxCeleb(Dataset):
# so, we check the vox1/wav dir status # so, we check the vox1/wav dir status
print(f"wav base path: {self.wav_path}") print(f"wav base path: {self.wav_path}")
if not os.path.isdir(self.wav_path): if not os.path.isdir(self.wav_path):
print(f"start to download the voxceleb1 dataset") print("start to download the voxceleb1 dataset")
download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip
self.archieves_audio_dev, self.archieves_audio_dev,
self.base_path, self.base_path,

@ -82,7 +82,10 @@ class VectorExecutor(BaseExecutor):
choices=["spk"], choices=["spk"],
help="task type in vector domain") help="task type in vector domain")
self.parser.add_argument( self.parser.add_argument(
"--input", type=str, default=None, help="Audio file to extract embedding.") "--input",
type=str,
default=None,
help="Audio file to extract embedding.")
self.parser.add_argument( self.parser.add_argument(
"--sample_rate", "--sample_rate",
type=int, type=int,
@ -344,8 +347,7 @@ class VectorExecutor(BaseExecutor):
Union[str, os.PathLike]: audio embedding info Union[str, os.PathLike]: audio embedding info
""" """
embedding = self._outputs["embedding"] embedding = self._outputs["embedding"]
dim = embedding.shape[0] return embedding
return {"dim": dim, "embedding": embedding}
def preprocess(self, model_type: str, input_file: Union[str, os.PathLike]): def preprocess(self, model_type: str, input_file: Union[str, os.PathLike]):
"""Extract the audio feat """Extract the audio feat

@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse import argparse
import ast
import os import os
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn.functional as F
from paddle.io import BatchSampler from paddle.io import BatchSampler
from paddle.io import DataLoader from paddle.io import DataLoader
from tqdm import tqdm from tqdm import tqdm

@ -14,7 +14,6 @@
# this is modified from SpeechBrain # this is modified from SpeechBrain
# https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py # https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py
import math import math
import os
from typing import List from typing import List
import numpy as np import numpy as np
@ -22,7 +21,6 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddleaudio import load as load_audio
from paddleaudio.datasets.rirs_noises import OpenRIRNoise from paddleaudio.datasets.rirs_noises import OpenRIRNoise
from paddlespeech.s2t.utils.log import Log from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.signal_processing import compute_amplitude from paddlespeech.vector.io.signal_processing import compute_amplitude

@ -11,8 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import numpy as np import numpy as np
import paddle import paddle

@ -1,3 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def pytest_addoption(parser): def pytest_addoption(parser):
parser.addoption("--device", action="store", default="cpu") parser.addoption("--device", action="store", default="cpu")
@ -8,4 +23,3 @@ def pytest_generate_tests(metafunc):
option_value = metafunc.config.option.device option_value = metafunc.config.option.device
if "device" in metafunc.fixturenames and option_value is not None: if "device" in metafunc.fixturenames and option_value is not None:
metafunc.parametrize("device", [option_value]) metafunc.parametrize("device", [option_value])

@ -11,15 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import numpy as np
import paddle import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import Dataset
def test_add_noise(tmpdir, device): def test_add_noise(tmpdir, device):

Loading…
Cancel
Save