diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py index d0978d9d..90586200 100644 --- a/dataset/voxceleb/voxceleb1.py +++ b/dataset/voxceleb/voxceleb1.py @@ -63,13 +63,15 @@ TEST_TARGET_DATA = "vox1_test_wav.zip vox1_test_wav.zip 185fdc63c3c739954633d503 TRIAL_BASE_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/" TRIAL_LIST = { - "veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1 - "veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned) - "list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H - "list_test_hard2.txt": "857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned) - "list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E - "list_test_all2.txt": "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned) - } + "veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1 + "veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned) + "list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H + "list_test_hard2.txt": + "857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned) + "list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E + "list_test_all2.txt": + "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned) +} parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -176,6 +178,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path, # create the manifest file create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path) + def prepare_trial(base_url, data_list, target_dir): if not os.path.exists(target_dir): os.makedirs(target_dir) @@ -185,10 +188,12 @@ def prepare_trial(base_url, data_list, target_dir): if not os.path.exists(os.path.join(target_dir, trial)): download_url = " --no-check-certificate " + base_url + "/" + trial download(url=download_url, md5sum=md5sum, target_dir=target_dir) + + def main(): if args.target_dir.startswith('~'): args.target_dir = os.path.expanduser(args.target_dir) - + # prepare the vox1 dev data prepare_dataset( base_url=BASE_URL, @@ -209,8 +214,7 @@ def main(): prepare_trial( base_url=TRIAL_BASE_URL, data_list=TRIAL_LIST, - target_dir=os.path.dirname(args.manifest_prefix) - ) + target_dir=os.path.dirname(args.manifest_prefix)) print("Manifest prepare done!") diff --git a/dataset/voxceleb/voxceleb2.py b/dataset/voxceleb/voxceleb2.py index ef7bb230..22a2e2ff 100644 --- a/dataset/voxceleb/voxceleb2.py +++ b/dataset/voxceleb/voxceleb2.py @@ -22,12 +22,10 @@ import codecs import glob import json import os -import subprocess from pathlib import Path import soundfile -from utils.utility import check_md5sum from utils.utility import download from utils.utility import unzip @@ -40,9 +38,8 @@ BASE_URL = "--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxcele DEV_DATA_URL = BASE_URL + '/vox2_aac.zip' DEV_MD5SUM = "bbc063c46078a602ca71605645c2a402" - # test data -TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip' +TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip' TEST_MD5SUM = "0d2b3ea430a821c33263b5ea37ede312" parser = argparse.ArgumentParser(description=__doc__) @@ -56,14 +53,16 @@ parser.add_argument( default="manifest", type=str, help="Filepath prefix for output manifests. (default: %(default)s)") -parser.add_argument("--download", - default=False, - action="store_true", - help="Download the voxceleb2 dataset. (default: %(default)s)") -parser.add_argument("--generate", - default=False, - action="store_true", - help="Generate the manifest files. (default: %(default)s)") +parser.add_argument( + "--download", + default=False, + action="store_true", + help="Download the voxceleb2 dataset. (default: %(default)s)") +parser.add_argument( + "--generate", + default=False, + action="store_true", + help="Generate the manifest files. (default: %(default)s)") args = parser.parse_args() @@ -138,7 +137,7 @@ def download_dataset(url, md5sum, target_dir, dataset): def main(): if args.target_dir.startswith('~'): args.target_dir = os.path.expanduser(args.target_dir) - + # download and unpack the vox2-dev data print("download: {}".format(args.download)) if args.download: @@ -157,7 +156,9 @@ def main(): print("VoxCeleb2 download is done!") if args.generate: - create_manifest(args.target_dir, manifest_path_prefix=args.manifest_prefix) + create_manifest( + args.target_dir, manifest_path_prefix=args.manifest_prefix) + if __name__ == '__main__': main() diff --git a/demos/audio_searching/src/operations/load.py b/demos/audio_searching/src/operations/load.py index 80b6375f..7a295bf3 100644 --- a/demos/audio_searching/src/operations/load.py +++ b/demos/audio_searching/src/operations/load.py @@ -26,8 +26,9 @@ def get_audios(path): """ supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"] return [ - item for sublist in [[os.path.join(dir, file) for file in files] - for dir, _, files in list(os.walk(path))] + item + for sublist in [[os.path.join(dir, file) for file in files] + for dir, _, files in list(os.walk(path))] for item in sublist if os.path.splitext(item)[1] in supported_formats ] diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md index c4d10ccf..8739d402 100644 --- a/demos/speaker_verification/README.md +++ b/demos/speaker_verification/README.md @@ -46,56 +46,46 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav Output: -```bash - demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , - 5.3940268 , -3.04878 , 1.611095 , 10.127234 , - -10.534177 , -15.821609 , 1.2032688 , -0.35080156, - 1.2629458 , -12.643498 , -2.5758228 , -11.343508 , - 2.3385992 , -8.719341 , 14.213509 , 15.404744 , - -0.39327756, 6.338786 , 2.688887 , 8.7104025 , - 17.469526 , -8.77959 , 7.0576906 , 4.648855 , - -1.3089896 , -23.294737 , 8.013747 , 13.891729 , - -9.926753 , 5.655307 , -5.9422326 , -22.842539 , - 0.6293588 , -18.46266 , -10.811862 , 9.8192625 , - 3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , - -14.739942 , 1.7594414 , -0.6485091 , 4.485623 , - 2.0207152 , 7.264915 , -6.40137 , 23.63524 , - 2.9711294 , -22.708025 , 9.93719 , 20.354511 , - -10.324688 , -0.700492 , -8.783211 , -5.27593 , - 15.999649 , 3.3004563 , 12.747926 , 15.429879 , - 4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , - 3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , - -23.908653 , -6.4799504 , -4.5365124 , -9.224193 , - 14.568347 , -10.568833 , 4.982321 , -4.342062 , - 0.0914714 , 12.645902 , -5.74285 , -3.2141201 , - -2.7173362 , -6.680575 , 0.4757669 , -5.035051 , - -6.7964664 , 16.865469 , -11.54324 , 7.681869 , - 0.44475392, 9.708182 , -8.932846 , 0.4123232 , - -4.361452 , 1.3948607 , 9.511665 , 0.11667654, - 2.9079323 , 6.049952 , 9.275183 , -18.078873 , - 6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , - 3.3404543 , 2.990815 , 4.010979 , 11.000591 , - -2.8873312 , 7.1352735 , -16.79663 , 18.495346 , - -14.293832 , 7.89578 , 2.2714825 , 22.976387 , - -4.875734 , -3.0836344 , -2.9999814 , 13.751918 , - 6.448228 , -11.924197 , 2.171869 , 2.0423572 , - -6.173772 , 10.778437 , 25.77281 , -4.9495463 , - 14.57806 , 0.3044315 , 2.6132357 , -7.591999 , - -2.076944 , 9.025118 , 1.7834753 , -3.1799617 , - -4.9401326 , 23.465864 , 5.1685796 , -9.018578 , - 9.037825 , -4.4150195 , 6.859591 , -12.274467 , - -0.88911164, 5.186309 , -3.9988663 , -13.638606 , - -9.925445 , -0.06329413, -3.6709652 , -12.397416 , - -12.719869 , -1.395601 , 2.1150916 , 5.7381287 , - -4.4691963 , -3.82819 , -0.84233856, -1.1604277 , - -13.490127 , 8.731719 , -20.778936 , -11.495662 , - 5.8033476 , -4.752041 , 10.833007 , -6.717991 , - 4.504732 , 13.4244375 , 1.1306485 , 7.3435574 , - 1.400918 , 14.704036 , -9.501399 , 7.2315617 , - -6.417456 , 1.3333273 , 11.872697 , -0.30664724, - 8.8845 , 6.5569253 , 4.7948146 , 0.03662816, - -8.704245 , 6.224871 , -3.2701402 , -11.508579 ], - dtype=float32)} + ```bash + demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268 + -3.04878 1.611095 10.127234 -10.534177 -15.821609 + 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228 + -11.343508 2.3385992 -8.719341 14.213509 15.404744 + -0.39327756 6.338786 2.688887 8.7104025 17.469526 + -8.77959 7.0576906 4.648855 -1.3089896 -23.294737 + 8.013747 13.891729 -9.926753 5.655307 -5.9422326 + -22.842539 0.6293588 -18.46266 -10.811862 9.8192625 + 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942 + 1.7594414 -0.6485091 4.485623 2.0207152 7.264915 + -6.40137 23.63524 2.9711294 -22.708025 9.93719 + 20.354511 -10.324688 -0.700492 -8.783211 -5.27593 + 15.999649 3.3004563 12.747926 15.429879 4.7849145 + 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628 + 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124 + -9.224193 14.568347 -10.568833 4.982321 -4.342062 + 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362 + -6.680575 0.4757669 -5.035051 -6.7964664 16.865469 + -11.54324 7.681869 0.44475392 9.708182 -8.932846 + 0.4123232 -4.361452 1.3948607 9.511665 0.11667654 + 2.9079323 6.049952 9.275183 -18.078873 6.2983274 + -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815 + 4.010979 11.000591 -2.8873312 7.1352735 -16.79663 + 18.495346 -14.293832 7.89578 2.2714825 22.976387 + -4.875734 -3.0836344 -2.9999814 13.751918 6.448228 + -11.924197 2.171869 2.0423572 -6.173772 10.778437 + 25.77281 -4.9495463 14.57806 0.3044315 2.6132357 + -7.591999 -2.076944 9.025118 1.7834753 -3.1799617 + -4.9401326 23.465864 5.1685796 -9.018578 9.037825 + -4.4150195 6.859591 -12.274467 -0.88911164 5.186309 + -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652 + -12.397416 -12.719869 -1.395601 2.1150916 5.7381287 + -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127 + 8.731719 -20.778936 -11.495662 5.8033476 -4.752041 + 10.833007 -6.717991 4.504732 13.4244375 1.1306485 + 7.3435574 1.400918 14.704036 -9.501399 7.2315617 + -6.417456 1.3333273 11.872697 -0.30664724 8.8845 + 6.5569253 4.7948146 0.03662816 -8.704245 6.224871 + -3.2701402 -11.508579 ] ``` - Python API @@ -118,55 +108,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav Output: ```bash # Vector Result: - {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , - 5.3940268 , -3.04878 , 1.611095 , 10.127234 , - -10.534177 , -15.821609 , 1.2032688 , -0.35080156, - 1.2629458 , -12.643498 , -2.5758228 , -11.343508 , - 2.3385992 , -8.719341 , 14.213509 , 15.404744 , - -0.39327756, 6.338786 , 2.688887 , 8.7104025 , - 17.469526 , -8.77959 , 7.0576906 , 4.648855 , - -1.3089896 , -23.294737 , 8.013747 , 13.891729 , - -9.926753 , 5.655307 , -5.9422326 , -22.842539 , - 0.6293588 , -18.46266 , -10.811862 , 9.8192625 , - 3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , - -14.739942 , 1.7594414 , -0.6485091 , 4.485623 , - 2.0207152 , 7.264915 , -6.40137 , 23.63524 , - 2.9711294 , -22.708025 , 9.93719 , 20.354511 , - -10.324688 , -0.700492 , -8.783211 , -5.27593 , - 15.999649 , 3.3004563 , 12.747926 , 15.429879 , - 4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , - 3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , - -23.908653 , -6.4799504 , -4.5365124 , -9.224193 , - 14.568347 , -10.568833 , 4.982321 , -4.342062 , - 0.0914714 , 12.645902 , -5.74285 , -3.2141201 , - -2.7173362 , -6.680575 , 0.4757669 , -5.035051 , - -6.7964664 , 16.865469 , -11.54324 , 7.681869 , - 0.44475392, 9.708182 , -8.932846 , 0.4123232 , - -4.361452 , 1.3948607 , 9.511665 , 0.11667654, - 2.9079323 , 6.049952 , 9.275183 , -18.078873 , - 6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , - 3.3404543 , 2.990815 , 4.010979 , 11.000591 , - -2.8873312 , 7.1352735 , -16.79663 , 18.495346 , - -14.293832 , 7.89578 , 2.2714825 , 22.976387 , - -4.875734 , -3.0836344 , -2.9999814 , 13.751918 , - 6.448228 , -11.924197 , 2.171869 , 2.0423572 , - -6.173772 , 10.778437 , 25.77281 , -4.9495463 , - 14.57806 , 0.3044315 , 2.6132357 , -7.591999 , - -2.076944 , 9.025118 , 1.7834753 , -3.1799617 , - -4.9401326 , 23.465864 , 5.1685796 , -9.018578 , - 9.037825 , -4.4150195 , 6.859591 , -12.274467 , - -0.88911164, 5.186309 , -3.9988663 , -13.638606 , - -9.925445 , -0.06329413, -3.6709652 , -12.397416 , - -12.719869 , -1.395601 , 2.1150916 , 5.7381287 , - -4.4691963 , -3.82819 , -0.84233856, -1.1604277 , - -13.490127 , 8.731719 , -20.778936 , -11.495662 , - 5.8033476 , -4.752041 , 10.833007 , -6.717991 , - 4.504732 , 13.4244375 , 1.1306485 , 7.3435574 , - 1.400918 , 14.704036 , -9.501399 , 7.2315617 , - -6.417456 , 1.3333273 , 11.872697 , -0.30664724, - 8.8845 , 6.5569253 , 4.7948146 , 0.03662816, - -8.704245 , 6.224871 , -3.2701402 , -11.508579 ], - dtype=float32)} + [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268 + -3.04878 1.611095 10.127234 -10.534177 -15.821609 + 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228 + -11.343508 2.3385992 -8.719341 14.213509 15.404744 + -0.39327756 6.338786 2.688887 8.7104025 17.469526 + -8.77959 7.0576906 4.648855 -1.3089896 -23.294737 + 8.013747 13.891729 -9.926753 5.655307 -5.9422326 + -22.842539 0.6293588 -18.46266 -10.811862 9.8192625 + 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942 + 1.7594414 -0.6485091 4.485623 2.0207152 7.264915 + -6.40137 23.63524 2.9711294 -22.708025 9.93719 + 20.354511 -10.324688 -0.700492 -8.783211 -5.27593 + 15.999649 3.3004563 12.747926 15.429879 4.7849145 + 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628 + 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124 + -9.224193 14.568347 -10.568833 4.982321 -4.342062 + 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362 + -6.680575 0.4757669 -5.035051 -6.7964664 16.865469 + -11.54324 7.681869 0.44475392 9.708182 -8.932846 + 0.4123232 -4.361452 1.3948607 9.511665 0.11667654 + 2.9079323 6.049952 9.275183 -18.078873 6.2983274 + -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815 + 4.010979 11.000591 -2.8873312 7.1352735 -16.79663 + 18.495346 -14.293832 7.89578 2.2714825 22.976387 + -4.875734 -3.0836344 -2.9999814 13.751918 6.448228 + -11.924197 2.171869 2.0423572 -6.173772 10.778437 + 25.77281 -4.9495463 14.57806 0.3044315 2.6132357 + -7.591999 -2.076944 9.025118 1.7834753 -3.1799617 + -4.9401326 23.465864 5.1685796 -9.018578 9.037825 + -4.4150195 6.859591 -12.274467 -0.88911164 5.186309 + -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652 + -12.397416 -12.719869 -1.395601 2.1150916 5.7381287 + -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127 + 8.731719 -20.778936 -11.495662 5.8033476 -4.752041 + 10.833007 -6.717991 4.504732 13.4244375 1.1306485 + 7.3435574 1.400918 14.704036 -9.501399 7.2315617 + -6.417456 1.3333273 11.872697 -0.30664724 8.8845 + 6.5569253 4.7948146 0.03662816 -8.704245 6.224871 + -3.2701402 -11.508579 ] ``` ### 4.Pretrained Models diff --git a/demos/speaker_verification/README_cn.md b/demos/speaker_verification/README_cn.md index e2799b75..fe8949b3 100644 --- a/demos/speaker_verification/README_cn.md +++ b/demos/speaker_verification/README_cn.md @@ -45,55 +45,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav 输出: ```bash - demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , - 5.3940268 , -3.04878 , 1.611095 , 10.127234 , - -10.534177 , -15.821609 , 1.2032688 , -0.35080156, - 1.2629458 , -12.643498 , -2.5758228 , -11.343508 , - 2.3385992 , -8.719341 , 14.213509 , 15.404744 , - -0.39327756, 6.338786 , 2.688887 , 8.7104025 , - 17.469526 , -8.77959 , 7.0576906 , 4.648855 , - -1.3089896 , -23.294737 , 8.013747 , 13.891729 , - -9.926753 , 5.655307 , -5.9422326 , -22.842539 , - 0.6293588 , -18.46266 , -10.811862 , 9.8192625 , - 3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , - -14.739942 , 1.7594414 , -0.6485091 , 4.485623 , - 2.0207152 , 7.264915 , -6.40137 , 23.63524 , - 2.9711294 , -22.708025 , 9.93719 , 20.354511 , - -10.324688 , -0.700492 , -8.783211 , -5.27593 , - 15.999649 , 3.3004563 , 12.747926 , 15.429879 , - 4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , - 3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , - -23.908653 , -6.4799504 , -4.5365124 , -9.224193 , - 14.568347 , -10.568833 , 4.982321 , -4.342062 , - 0.0914714 , 12.645902 , -5.74285 , -3.2141201 , - -2.7173362 , -6.680575 , 0.4757669 , -5.035051 , - -6.7964664 , 16.865469 , -11.54324 , 7.681869 , - 0.44475392, 9.708182 , -8.932846 , 0.4123232 , - -4.361452 , 1.3948607 , 9.511665 , 0.11667654, - 2.9079323 , 6.049952 , 9.275183 , -18.078873 , - 6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , - 3.3404543 , 2.990815 , 4.010979 , 11.000591 , - -2.8873312 , 7.1352735 , -16.79663 , 18.495346 , - -14.293832 , 7.89578 , 2.2714825 , 22.976387 , - -4.875734 , -3.0836344 , -2.9999814 , 13.751918 , - 6.448228 , -11.924197 , 2.171869 , 2.0423572 , - -6.173772 , 10.778437 , 25.77281 , -4.9495463 , - 14.57806 , 0.3044315 , 2.6132357 , -7.591999 , - -2.076944 , 9.025118 , 1.7834753 , -3.1799617 , - -4.9401326 , 23.465864 , 5.1685796 , -9.018578 , - 9.037825 , -4.4150195 , 6.859591 , -12.274467 , - -0.88911164, 5.186309 , -3.9988663 , -13.638606 , - -9.925445 , -0.06329413, -3.6709652 , -12.397416 , - -12.719869 , -1.395601 , 2.1150916 , 5.7381287 , - -4.4691963 , -3.82819 , -0.84233856, -1.1604277 , - -13.490127 , 8.731719 , -20.778936 , -11.495662 , - 5.8033476 , -4.752041 , 10.833007 , -6.717991 , - 4.504732 , 13.4244375 , 1.1306485 , 7.3435574 , - 1.400918 , 14.704036 , -9.501399 , 7.2315617 , - -6.417456 , 1.3333273 , 11.872697 , -0.30664724, - 8.8845 , 6.5569253 , 4.7948146 , 0.03662816, - -8.704245 , 6.224871 , -3.2701402 , -11.508579 ], - dtype=float32)} + demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268 + -3.04878 1.611095 10.127234 -10.534177 -15.821609 + 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228 + -11.343508 2.3385992 -8.719341 14.213509 15.404744 + -0.39327756 6.338786 2.688887 8.7104025 17.469526 + -8.77959 7.0576906 4.648855 -1.3089896 -23.294737 + 8.013747 13.891729 -9.926753 5.655307 -5.9422326 + -22.842539 0.6293588 -18.46266 -10.811862 9.8192625 + 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942 + 1.7594414 -0.6485091 4.485623 2.0207152 7.264915 + -6.40137 23.63524 2.9711294 -22.708025 9.93719 + 20.354511 -10.324688 -0.700492 -8.783211 -5.27593 + 15.999649 3.3004563 12.747926 15.429879 4.7849145 + 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628 + 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124 + -9.224193 14.568347 -10.568833 4.982321 -4.342062 + 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362 + -6.680575 0.4757669 -5.035051 -6.7964664 16.865469 + -11.54324 7.681869 0.44475392 9.708182 -8.932846 + 0.4123232 -4.361452 1.3948607 9.511665 0.11667654 + 2.9079323 6.049952 9.275183 -18.078873 6.2983274 + -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815 + 4.010979 11.000591 -2.8873312 7.1352735 -16.79663 + 18.495346 -14.293832 7.89578 2.2714825 22.976387 + -4.875734 -3.0836344 -2.9999814 13.751918 6.448228 + -11.924197 2.171869 2.0423572 -6.173772 10.778437 + 25.77281 -4.9495463 14.57806 0.3044315 2.6132357 + -7.591999 -2.076944 9.025118 1.7834753 -3.1799617 + -4.9401326 23.465864 5.1685796 -9.018578 9.037825 + -4.4150195 6.859591 -12.274467 -0.88911164 5.186309 + -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652 + -12.397416 -12.719869 -1.395601 2.1150916 5.7381287 + -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127 + 8.731719 -20.778936 -11.495662 5.8033476 -4.752041 + 10.833007 -6.717991 4.504732 13.4244375 1.1306485 + 7.3435574 1.400918 14.704036 -9.501399 7.2315617 + -6.417456 1.3333273 11.872697 -0.30664724 8.8845 + 6.5569253 4.7948146 0.03662816 -8.704245 6.224871 + -3.2701402 -11.508579 ] ``` - Python API @@ -116,55 +106,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav 输出: ```bash # Vector Result: - {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 , - 5.3940268 , -3.04878 , 1.611095 , 10.127234 , - -10.534177 , -15.821609 , 1.2032688 , -0.35080156, - 1.2629458 , -12.643498 , -2.5758228 , -11.343508 , - 2.3385992 , -8.719341 , 14.213509 , 15.404744 , - -0.39327756, 6.338786 , 2.688887 , 8.7104025 , - 17.469526 , -8.77959 , 7.0576906 , 4.648855 , - -1.3089896 , -23.294737 , 8.013747 , 13.891729 , - -9.926753 , 5.655307 , -5.9422326 , -22.842539 , - 0.6293588 , -18.46266 , -10.811862 , 9.8192625 , - 3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 , - -14.739942 , 1.7594414 , -0.6485091 , 4.485623 , - 2.0207152 , 7.264915 , -6.40137 , 23.63524 , - 2.9711294 , -22.708025 , 9.93719 , 20.354511 , - -10.324688 , -0.700492 , -8.783211 , -5.27593 , - 15.999649 , 3.3004563 , 12.747926 , 15.429879 , - 4.7849145 , 5.6699696 , -2.3826702 , 10.605882 , - 3.9112158 , 3.1500628 , 15.859915 , -2.1832209 , - -23.908653 , -6.4799504 , -4.5365124 , -9.224193 , - 14.568347 , -10.568833 , 4.982321 , -4.342062 , - 0.0914714 , 12.645902 , -5.74285 , -3.2141201 , - -2.7173362 , -6.680575 , 0.4757669 , -5.035051 , - -6.7964664 , 16.865469 , -11.54324 , 7.681869 , - 0.44475392, 9.708182 , -8.932846 , 0.4123232 , - -4.361452 , 1.3948607 , 9.511665 , 0.11667654, - 2.9079323 , 6.049952 , 9.275183 , -18.078873 , - 6.2983274 , -0.7500531 , -2.725033 , -7.6027865 , - 3.3404543 , 2.990815 , 4.010979 , 11.000591 , - -2.8873312 , 7.1352735 , -16.79663 , 18.495346 , - -14.293832 , 7.89578 , 2.2714825 , 22.976387 , - -4.875734 , -3.0836344 , -2.9999814 , 13.751918 , - 6.448228 , -11.924197 , 2.171869 , 2.0423572 , - -6.173772 , 10.778437 , 25.77281 , -4.9495463 , - 14.57806 , 0.3044315 , 2.6132357 , -7.591999 , - -2.076944 , 9.025118 , 1.7834753 , -3.1799617 , - -4.9401326 , 23.465864 , 5.1685796 , -9.018578 , - 9.037825 , -4.4150195 , 6.859591 , -12.274467 , - -0.88911164, 5.186309 , -3.9988663 , -13.638606 , - -9.925445 , -0.06329413, -3.6709652 , -12.397416 , - -12.719869 , -1.395601 , 2.1150916 , 5.7381287 , - -4.4691963 , -3.82819 , -0.84233856, -1.1604277 , - -13.490127 , 8.731719 , -20.778936 , -11.495662 , - 5.8033476 , -4.752041 , 10.833007 , -6.717991 , - 4.504732 , 13.4244375 , 1.1306485 , 7.3435574 , - 1.400918 , 14.704036 , -9.501399 , 7.2315617 , - -6.417456 , 1.3333273 , 11.872697 , -0.30664724, - 8.8845 , 6.5569253 , 4.7948146 , 0.03662816, - -8.704245 , 6.224871 , -3.2701402 , -11.508579 ], - dtype=float32)} + [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268 + -3.04878 1.611095 10.127234 -10.534177 -15.821609 + 1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228 + -11.343508 2.3385992 -8.719341 14.213509 15.404744 + -0.39327756 6.338786 2.688887 8.7104025 17.469526 + -8.77959 7.0576906 4.648855 -1.3089896 -23.294737 + 8.013747 13.891729 -9.926753 5.655307 -5.9422326 + -22.842539 0.6293588 -18.46266 -10.811862 9.8192625 + 3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942 + 1.7594414 -0.6485091 4.485623 2.0207152 7.264915 + -6.40137 23.63524 2.9711294 -22.708025 9.93719 + 20.354511 -10.324688 -0.700492 -8.783211 -5.27593 + 15.999649 3.3004563 12.747926 15.429879 4.7849145 + 5.6699696 -2.3826702 10.605882 3.9112158 3.1500628 + 15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124 + -9.224193 14.568347 -10.568833 4.982321 -4.342062 + 0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362 + -6.680575 0.4757669 -5.035051 -6.7964664 16.865469 + -11.54324 7.681869 0.44475392 9.708182 -8.932846 + 0.4123232 -4.361452 1.3948607 9.511665 0.11667654 + 2.9079323 6.049952 9.275183 -18.078873 6.2983274 + -0.7500531 -2.725033 -7.6027865 3.3404543 2.990815 + 4.010979 11.000591 -2.8873312 7.1352735 -16.79663 + 18.495346 -14.293832 7.89578 2.2714825 22.976387 + -4.875734 -3.0836344 -2.9999814 13.751918 6.448228 + -11.924197 2.171869 2.0423572 -6.173772 10.778437 + 25.77281 -4.9495463 14.57806 0.3044315 2.6132357 + -7.591999 -2.076944 9.025118 1.7834753 -3.1799617 + -4.9401326 23.465864 5.1685796 -9.018578 9.037825 + -4.4150195 6.859591 -12.274467 -0.88911164 5.186309 + -3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652 + -12.397416 -12.719869 -1.395601 2.1150916 5.7381287 + -4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127 + 8.731719 -20.778936 -11.495662 5.8033476 -4.752041 + 10.833007 -6.717991 4.504732 13.4244375 1.1306485 + 7.3435574 1.400918 14.704036 -9.501399 7.2315617 + -6.417456 1.3333273 11.872697 -0.30664724 8.8845 + 6.5569253 4.7948146 0.03662816 -8.704245 6.224871 + -3.2701402 -11.508579 ] ``` ### 4.预训练模型 diff --git a/examples/voxceleb/README.md b/examples/voxceleb/README.md index a2e58e00..42f8903e 100644 --- a/examples/voxceleb/README.md +++ b/examples/voxceleb/README.md @@ -48,9 +48,3 @@ You can do the conversion using ffmpeg https://gist.github.com/seungwonpark/4f2 |VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 | |VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 | |VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 | - - - - - - diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index 19ba41b8..03d05400 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse -import os import paddle from yacs.config import CfgNode diff --git a/paddleaudio/paddleaudio/datasets/__init__.py b/paddleaudio/paddleaudio/datasets/__init__.py index 6f44e977..ebd4af98 100644 --- a/paddleaudio/paddleaudio/datasets/__init__.py +++ b/paddleaudio/paddleaudio/datasets/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from .esc50 import ESC50 from .gtzan import GTZAN +from .rirs_noises import OpenRIRNoise from .tess import TESS from .urban_sound import UrbanSound8K from .voxceleb import VoxCeleb -from .rirs_noises import OpenRIRNoise diff --git a/paddleaudio/paddleaudio/datasets/rirs_noises.py b/paddleaudio/paddleaudio/datasets/rirs_noises.py index 80bb2d74..68639a60 100644 --- a/paddleaudio/paddleaudio/datasets/rirs_noises.py +++ b/paddleaudio/paddleaudio/datasets/rirs_noises.py @@ -13,12 +13,9 @@ # limitations under the License. import collections import csv -import glob import os import random -from typing import Dict from typing import List -from typing import Tuple from paddle.io import Dataset from tqdm import tqdm @@ -26,7 +23,6 @@ from tqdm import tqdm from ..backends import load as load_audio from ..backends import save as save_wav from ..utils import DATA_HOME -from ..utils import decompress from ..utils.download import download_and_decompress from .dataset import feat_funcs diff --git a/paddleaudio/paddleaudio/datasets/voxceleb.py b/paddleaudio/paddleaudio/datasets/voxceleb.py index b9b8c271..3f72b5f2 100644 --- a/paddleaudio/paddleaudio/datasets/voxceleb.py +++ b/paddleaudio/paddleaudio/datasets/voxceleb.py @@ -17,9 +17,7 @@ import glob import os import random from multiprocessing import cpu_count -from typing import Dict from typing import List -from typing import Tuple from paddle.io import Dataset from pathos.multiprocessing import Pool @@ -135,7 +133,7 @@ class VoxCeleb(Dataset): # so, we check the vox1/wav dir status print(f"wav base path: {self.wav_path}") if not os.path.isdir(self.wav_path): - print(f"start to download the voxceleb1 dataset") + print("start to download the voxceleb1 dataset") download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip self.archieves_audio_dev, self.base_path, diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 79d3b5db..175a9723 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -82,7 +82,10 @@ class VectorExecutor(BaseExecutor): choices=["spk"], help="task type in vector domain") self.parser.add_argument( - "--input", type=str, default=None, help="Audio file to extract embedding.") + "--input", + type=str, + default=None, + help="Audio file to extract embedding.") self.parser.add_argument( "--sample_rate", type=int, @@ -344,8 +347,7 @@ class VectorExecutor(BaseExecutor): Union[str, os.PathLike]: audio embedding info """ embedding = self._outputs["embedding"] - dim = embedding.shape[0] - return {"dim": dim, "embedding": embedding} + return embedding def preprocess(self, model_type: str, input_file: Union[str, os.PathLike]): """Extract the audio feat diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index 76832fd8..d0de6dc5 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse -import ast import os import numpy as np import paddle -import paddle.nn.functional as F from paddle.io import BatchSampler from paddle.io import DataLoader from tqdm import tqdm diff --git a/paddlespeech/vector/io/augment.py b/paddlespeech/vector/io/augment.py index 6e508c37..3baace13 100644 --- a/paddlespeech/vector/io/augment.py +++ b/paddlespeech/vector/io/augment.py @@ -14,7 +14,6 @@ # this is modified from SpeechBrain # https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py import math -import os from typing import List import numpy as np @@ -22,7 +21,6 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from paddleaudio import load as load_audio from paddleaudio.datasets.rirs_noises import OpenRIRNoise from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.signal_processing import compute_amplitude diff --git a/paddlespeech/vector/io/signal_processing.py b/paddlespeech/vector/io/signal_processing.py index a61bf554..ee939bdb 100644 --- a/paddlespeech/vector/io/signal_processing.py +++ b/paddlespeech/vector/io/signal_processing.py @@ -11,8 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math - import numpy as np import paddle diff --git a/tests/unit/vector/conftest.py b/tests/unit/vector/conftest.py index 7cac519b..cc5dccd1 100644 --- a/tests/unit/vector/conftest.py +++ b/tests/unit/vector/conftest.py @@ -1,3 +1,18 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + def pytest_addoption(parser): parser.addoption("--device", action="store", default="cpu") @@ -8,4 +23,3 @@ def pytest_generate_tests(metafunc): option_value = metafunc.config.option.device if "device" in metafunc.fixturenames and option_value is not None: metafunc.parametrize("device", [option_value]) - diff --git a/tests/unit/vector/test_augment.py b/tests/unit/vector/test_augment.py index 21d75bb3..5ae01da4 100644 --- a/tests/unit/vector/test_augment.py +++ b/tests/unit/vector/test_augment.py @@ -11,15 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os - -import numpy as np import paddle -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.io import BatchSampler -from paddle.io import DataLoader -from paddle.io import Dataset def test_add_noise(tmpdir, device):