change the vector output to numpy.array

pull/1614/head
xiongxinlei 3 years ago
parent 5ae57206f3
commit ed7113f320

@ -63,13 +63,15 @@ TEST_TARGET_DATA = "vox1_test_wav.zip vox1_test_wav.zip 185fdc63c3c739954633d503
TRIAL_BASE_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/"
TRIAL_LIST = {
"veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1
"veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned)
"list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H
"list_test_hard2.txt": "857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned)
"list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E
"list_test_all2.txt": "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned)
}
"veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1
"veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned)
"list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H
"list_test_hard2.txt":
"857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned)
"list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E
"list_test_all2.txt":
"a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned)
}
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
@ -176,6 +178,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file
create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(target_dir):
os.makedirs(target_dir)
@ -185,6 +188,8 @@ def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(os.path.join(target_dir, trial)):
download_url = " --no-check-certificate " + base_url + "/" + trial
download(url=download_url, md5sum=md5sum, target_dir=target_dir)
def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)
@ -209,8 +214,7 @@ def main():
prepare_trial(
base_url=TRIAL_BASE_URL,
data_list=TRIAL_LIST,
target_dir=os.path.dirname(args.manifest_prefix)
)
target_dir=os.path.dirname(args.manifest_prefix))
print("Manifest prepare done!")

@ -22,12 +22,10 @@ import codecs
import glob
import json
import os
import subprocess
from pathlib import Path
import soundfile
from utils.utility import check_md5sum
from utils.utility import download
from utils.utility import unzip
@ -40,9 +38,8 @@ BASE_URL = "--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxcele
DEV_DATA_URL = BASE_URL + '/vox2_aac.zip'
DEV_MD5SUM = "bbc063c46078a602ca71605645c2a402"
# test data
TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip'
TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip'
TEST_MD5SUM = "0d2b3ea430a821c33263b5ea37ede312"
parser = argparse.ArgumentParser(description=__doc__)
@ -56,14 +53,16 @@ parser.add_argument(
default="manifest",
type=str,
help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument("--download",
default=False,
action="store_true",
help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument("--generate",
default=False,
action="store_true",
help="Generate the manifest files. (default: %(default)s)")
parser.add_argument(
"--download",
default=False,
action="store_true",
help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument(
"--generate",
default=False,
action="store_true",
help="Generate the manifest files. (default: %(default)s)")
args = parser.parse_args()
@ -157,7 +156,9 @@ def main():
print("VoxCeleb2 download is done!")
if args.generate:
create_manifest(args.target_dir, manifest_path_prefix=args.manifest_prefix)
create_manifest(
args.target_dir, manifest_path_prefix=args.manifest_prefix)
if __name__ == '__main__':
main()

@ -26,8 +26,9 @@ def get_audios(path):
"""
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [
item for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
item
for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats
]

@ -46,56 +46,46 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output:
```bash
demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
```bash
demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```
- Python API
@ -118,55 +108,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output:
```bash
# Vector Result:
{'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
[ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```
### 4.Pretrained Models

@ -45,55 +45,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出:
```bash
demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```
- Python API
@ -116,55 +106,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出:
```bash
# Vector Result:
{'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
[ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```
### 4.预训练模型

@ -48,9 +48,3 @@ You can do the conversion using ffmpeg https://gist.github.com/seungwonpark/4f2
|VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 |
|VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 |
|VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 |

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
from yacs.config import CfgNode

@ -13,7 +13,7 @@
# limitations under the License.
from .esc50 import ESC50
from .gtzan import GTZAN
from .rirs_noises import OpenRIRNoise
from .tess import TESS
from .urban_sound import UrbanSound8K
from .voxceleb import VoxCeleb
from .rirs_noises import OpenRIRNoise

@ -13,12 +13,9 @@
# limitations under the License.
import collections
import csv
import glob
import os
import random
from typing import Dict
from typing import List
from typing import Tuple
from paddle.io import Dataset
from tqdm import tqdm
@ -26,7 +23,6 @@ from tqdm import tqdm
from ..backends import load as load_audio
from ..backends import save as save_wav
from ..utils import DATA_HOME
from ..utils import decompress
from ..utils.download import download_and_decompress
from .dataset import feat_funcs

@ -17,9 +17,7 @@ import glob
import os
import random
from multiprocessing import cpu_count
from typing import Dict
from typing import List
from typing import Tuple
from paddle.io import Dataset
from pathos.multiprocessing import Pool
@ -135,7 +133,7 @@ class VoxCeleb(Dataset):
# so, we check the vox1/wav dir status
print(f"wav base path: {self.wav_path}")
if not os.path.isdir(self.wav_path):
print(f"start to download the voxceleb1 dataset")
print("start to download the voxceleb1 dataset")
download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip
self.archieves_audio_dev,
self.base_path,

@ -82,7 +82,10 @@ class VectorExecutor(BaseExecutor):
choices=["spk"],
help="task type in vector domain")
self.parser.add_argument(
"--input", type=str, default=None, help="Audio file to extract embedding.")
"--input",
type=str,
default=None,
help="Audio file to extract embedding.")
self.parser.add_argument(
"--sample_rate",
type=int,
@ -344,8 +347,7 @@ class VectorExecutor(BaseExecutor):
Union[str, os.PathLike]: audio embedding info
"""
embedding = self._outputs["embedding"]
dim = embedding.shape[0]
return {"dim": dim, "embedding": embedding}
return embedding
def preprocess(self, model_type: str, input_file: Union[str, os.PathLike]):
"""Extract the audio feat

@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import os
import numpy as np
import paddle
import paddle.nn.functional as F
from paddle.io import BatchSampler
from paddle.io import DataLoader
from tqdm import tqdm

@ -14,7 +14,6 @@
# this is modified from SpeechBrain
# https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py
import math
import os
from typing import List
import numpy as np
@ -22,7 +21,6 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleaudio import load as load_audio
from paddleaudio.datasets.rirs_noises import OpenRIRNoise
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.signal_processing import compute_amplitude

@ -11,8 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import paddle

@ -1,3 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def pytest_addoption(parser):
parser.addoption("--device", action="store", default="cpu")
@ -8,4 +23,3 @@ def pytest_generate_tests(metafunc):
option_value = metafunc.config.option.device
if "device" in metafunc.fixturenames and option_value is not None:
metafunc.parametrize("device", [option_value])

@ -11,15 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import Dataset
def test_add_noise(tmpdir, device):

Loading…
Cancel
Save