You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
71 lines
2.2 KiB
71 lines
2.2 KiB
import argparse
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import tqdm
|
|
|
|
from paddlespeech.cli.vector import VectorExecutor
|
|
|
|
|
|
def _process_utterance(ifpath: Path,
|
|
input_dir: Path,
|
|
output_dir: Path,
|
|
vec_executor):
|
|
rel_path = ifpath.relative_to(input_dir)
|
|
ofpath = (output_dir / rel_path).with_suffix(".npy")
|
|
ofpath.parent.mkdir(parents=True, exist_ok=True)
|
|
embed = vec_executor(audio_file=ifpath, force_yes=True)
|
|
np.save(ofpath, embed)
|
|
return ofpath
|
|
|
|
|
|
def main(args):
|
|
# input output preparation
|
|
input_dir = Path(args.input).expanduser()
|
|
ifpaths = list(input_dir.rglob(args.pattern))
|
|
print(f"{len(ifpaths)} utterances in total")
|
|
output_dir = Path(args.output).expanduser()
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
vec_executor = VectorExecutor()
|
|
nprocs = args.num_cpu
|
|
|
|
# warm up
|
|
vec_executor(audio_file=ifpaths[0], force_yes=True)
|
|
|
|
if nprocs == 1:
|
|
results = []
|
|
for ifpath in tqdm.tqdm(ifpaths, total=len(ifpaths)):
|
|
_process_utterance(
|
|
ifpath=ifpath,
|
|
input_dir=input_dir,
|
|
output_dir=output_dir,
|
|
vec_executor=vec_executor)
|
|
else:
|
|
with ThreadPoolExecutor(nprocs) as pool:
|
|
with tqdm.tqdm(total=len(ifpaths)) as progress:
|
|
for ifpath in ifpaths:
|
|
future = pool.submit(_process_utterance, ifpath, input_dir,
|
|
output_dir, vec_executor)
|
|
future.add_done_callback(lambda p: progress.update())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="compute utterance embed.")
|
|
parser.add_argument(
|
|
"--input", type=str, help="path of the audio_file folder.")
|
|
parser.add_argument(
|
|
"--pattern",
|
|
type=str,
|
|
default="*.wav",
|
|
help="pattern to filter audio files.")
|
|
parser.add_argument(
|
|
"--output",
|
|
metavar="OUTPUT_DIR",
|
|
help="path to save spk embedding results.")
|
|
parser.add_argument(
|
|
"--num-cpu", type=int, default=1, help="number of process.")
|
|
args = parser.parse_args()
|
|
|
|
main(args)
|