You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/paddlespeech/t2s/exps/fastspeech2/vc2_infer.py

71 lines
2.2 KiB

import argparse
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import numpy as np
import tqdm
from paddlespeech.cli.vector import VectorExecutor
def _process_utterance(ifpath: Path,
input_dir: Path,
output_dir: Path,
vec_executor):
rel_path = ifpath.relative_to(input_dir)
ofpath = (output_dir / rel_path).with_suffix(".npy")
ofpath.parent.mkdir(parents=True, exist_ok=True)
embed = vec_executor(audio_file=ifpath, force_yes=True)
np.save(ofpath, embed)
return ofpath
def main(args):
# input output preparation
input_dir = Path(args.input).expanduser()
ifpaths = list(input_dir.rglob(args.pattern))
print(f"{len(ifpaths)} utterances in total")
output_dir = Path(args.output).expanduser()
output_dir.mkdir(parents=True, exist_ok=True)
vec_executor = VectorExecutor()
nprocs = args.num_cpu
# warm up
vec_executor(audio_file=ifpaths[0], force_yes=True)
if nprocs == 1:
results = []
for ifpath in tqdm.tqdm(ifpaths, total=len(ifpaths)):
_process_utterance(
ifpath=ifpath,
input_dir=input_dir,
output_dir=output_dir,
vec_executor=vec_executor)
else:
with ThreadPoolExecutor(nprocs) as pool:
with tqdm.tqdm(total=len(ifpaths)) as progress:
for ifpath in ifpaths:
future = pool.submit(_process_utterance, ifpath, input_dir,
output_dir, vec_executor)
future.add_done_callback(lambda p: progress.update())
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="compute utterance embed.")
parser.add_argument(
"--input", type=str, help="path of the audio_file folder.")
parser.add_argument(
"--pattern",
type=str,
default="*.wav",
help="pattern to filter audio files.")
parser.add_argument(
"--output",
metavar="OUTPUT_DIR",
help="path to save spk embedding results.")
parser.add_argument(
"--num-cpu", type=int, default=1, help="number of process.")
args = parser.parse_args()
main(args)