commit
bdf876ea7b
Before Width: | Height: | Size: 84 KiB |
@ -1,13 +1,8 @@
|
||||
aiofiles
|
||||
faiss-cpu
|
||||
fastapi
|
||||
librosa
|
||||
numpy
|
||||
paddlenlp
|
||||
paddlepaddle
|
||||
paddlespeech
|
||||
praatio==5.0.0
|
||||
pydantic
|
||||
python-multipartscikit_learn
|
||||
SoundFile
|
||||
python-multipart
|
||||
scikit_learn
|
||||
starlette
|
||||
uvicorn
|
||||
|
@ -0,0 +1,198 @@
|
||||
import os
|
||||
|
||||
from .util import get_ngpu
|
||||
from .util import MAIN_ROOT
|
||||
from .util import run_cmd
|
||||
|
||||
|
||||
class SAT:
|
||||
def __init__(self):
|
||||
# pretrain model path
|
||||
self.zh_pretrain_model_path = os.path.realpath(
|
||||
"source/model/erniesat_aishell3_ckpt_1.2.0")
|
||||
self.en_pretrain_model_path = os.path.realpath(
|
||||
"source/model/erniesat_vctk_ckpt_1.2.0")
|
||||
self.cross_pretrain_model_path = os.path.realpath(
|
||||
"source/model/erniesat_aishell3_vctk_ckpt_1.2.0")
|
||||
|
||||
self.zh_voc_model_path = os.path.realpath(
|
||||
"source/model/hifigan_aishell3_ckpt_0.2.0")
|
||||
self.eb_voc_model_path = os.path.realpath(
|
||||
"source/model/hifigan_vctk_ckpt_0.2.0")
|
||||
self.cross_voc_model_path = os.path.realpath(
|
||||
"source/model/hifigan_aishell3_ckpt_0.2.0")
|
||||
|
||||
self.BIN_DIR = os.path.join(MAIN_ROOT,
|
||||
"paddlespeech/t2s/exps/ernie_sat")
|
||||
|
||||
def zh_synthesize_edit(self,
|
||||
old_str: str,
|
||||
new_str: str,
|
||||
input_name: os.PathLike,
|
||||
output_name: os.PathLike,
|
||||
task_name: str="synthesize",
|
||||
erniesat_ckpt_name: str="snapshot_iter_289500.pdz"):
|
||||
|
||||
if task_name not in ['synthesize', 'edit']:
|
||||
print("task name only in ['edit', 'synthesize']")
|
||||
return None
|
||||
|
||||
# 推理文件配置
|
||||
config_path = os.path.join(self.zh_pretrain_model_path, "default.yaml")
|
||||
phones_dict = os.path.join(self.zh_pretrain_model_path,
|
||||
"phone_id_map.txt")
|
||||
erniesat_ckpt = os.path.join(self.zh_pretrain_model_path,
|
||||
erniesat_ckpt_name)
|
||||
erniesat_stat = os.path.join(self.zh_pretrain_model_path,
|
||||
"speech_stats.npy")
|
||||
|
||||
voc = "hifigan_aishell3"
|
||||
voc_config = os.path.join(self.zh_voc_model_path, "default.yaml")
|
||||
voc_ckpt = os.path.join(self.zh_voc_model_path,
|
||||
"snapshot_iter_2500000.pdz")
|
||||
voc_stat = os.path.join(self.zh_voc_model_path, "feats_stats.npy")
|
||||
|
||||
cmd = self.get_cmd(
|
||||
task_name=task_name,
|
||||
input_name=input_name,
|
||||
old_str=old_str,
|
||||
new_str=new_str,
|
||||
config_path=config_path,
|
||||
phones_dict=phones_dict,
|
||||
erniesat_ckpt=erniesat_ckpt,
|
||||
erniesat_stat=erniesat_stat,
|
||||
voc=voc,
|
||||
voc_config=voc_config,
|
||||
voc_ckpt=voc_ckpt,
|
||||
voc_stat=voc_stat,
|
||||
output_name=output_name,
|
||||
source_lang="zh",
|
||||
target_lang="zh")
|
||||
|
||||
return run_cmd(cmd, output_name)
|
||||
|
||||
def crossclone(self,
|
||||
old_str: str,
|
||||
new_str: str,
|
||||
input_name: os.PathLike,
|
||||
output_name: os.PathLike,
|
||||
source_lang: str,
|
||||
target_lang: str,
|
||||
erniesat_ckpt_name: str="snapshot_iter_489000.pdz"):
|
||||
# 推理文件配置
|
||||
config_path = os.path.join(self.cross_pretrain_model_path,
|
||||
"default.yaml")
|
||||
phones_dict = os.path.join(self.cross_pretrain_model_path,
|
||||
"phone_id_map.txt")
|
||||
erniesat_ckpt = os.path.join(self.cross_pretrain_model_path,
|
||||
erniesat_ckpt_name)
|
||||
erniesat_stat = os.path.join(self.cross_pretrain_model_path,
|
||||
"speech_stats.npy")
|
||||
|
||||
voc = "hifigan_aishell3"
|
||||
voc_config = os.path.join(self.cross_voc_model_path, "default.yaml")
|
||||
voc_ckpt = os.path.join(self.cross_voc_model_path,
|
||||
"snapshot_iter_2500000.pdz")
|
||||
voc_stat = os.path.join(self.cross_voc_model_path, "feats_stats.npy")
|
||||
task_name = "synthesize"
|
||||
cmd = self.get_cmd(
|
||||
task_name=task_name,
|
||||
input_name=input_name,
|
||||
old_str=old_str,
|
||||
new_str=new_str,
|
||||
config_path=config_path,
|
||||
phones_dict=phones_dict,
|
||||
erniesat_ckpt=erniesat_ckpt,
|
||||
erniesat_stat=erniesat_stat,
|
||||
voc=voc,
|
||||
voc_config=voc_config,
|
||||
voc_ckpt=voc_ckpt,
|
||||
voc_stat=voc_stat,
|
||||
output_name=output_name,
|
||||
source_lang=source_lang,
|
||||
target_lang=target_lang)
|
||||
|
||||
return run_cmd(cmd, output_name)
|
||||
|
||||
def en_synthesize_edit(self,
|
||||
old_str: str,
|
||||
new_str: str,
|
||||
input_name: os.PathLike,
|
||||
output_name: os.PathLike,
|
||||
task_name: str="synthesize",
|
||||
erniesat_ckpt_name: str="snapshot_iter_199500.pdz"):
|
||||
|
||||
# 推理文件配置
|
||||
config_path = os.path.join(self.en_pretrain_model_path, "default.yaml")
|
||||
phones_dict = os.path.join(self.en_pretrain_model_path,
|
||||
"phone_id_map.txt")
|
||||
erniesat_ckpt = os.path.join(self.en_pretrain_model_path,
|
||||
erniesat_ckpt_name)
|
||||
erniesat_stat = os.path.join(self.en_pretrain_model_path,
|
||||
"speech_stats.npy")
|
||||
|
||||
voc = "hifigan_aishell3"
|
||||
voc_config = os.path.join(self.zh_voc_model_path, "default.yaml")
|
||||
voc_ckpt = os.path.join(self.zh_voc_model_path,
|
||||
"snapshot_iter_2500000.pdz")
|
||||
voc_stat = os.path.join(self.zh_voc_model_path, "feats_stats.npy")
|
||||
|
||||
cmd = self.get_cmd(
|
||||
task_name=task_name,
|
||||
input_name=input_name,
|
||||
old_str=old_str,
|
||||
new_str=new_str,
|
||||
config_path=config_path,
|
||||
phones_dict=phones_dict,
|
||||
erniesat_ckpt=erniesat_ckpt,
|
||||
erniesat_stat=erniesat_stat,
|
||||
voc=voc,
|
||||
voc_config=voc_config,
|
||||
voc_ckpt=voc_ckpt,
|
||||
voc_stat=voc_stat,
|
||||
output_name=output_name,
|
||||
source_lang="en",
|
||||
target_lang="en")
|
||||
|
||||
return run_cmd(cmd, output_name)
|
||||
|
||||
def get_cmd(self,
|
||||
task_name: str,
|
||||
input_name: str,
|
||||
old_str: str,
|
||||
new_str: str,
|
||||
config_path: str,
|
||||
phones_dict: str,
|
||||
erniesat_ckpt: str,
|
||||
erniesat_stat: str,
|
||||
voc: str,
|
||||
voc_config: str,
|
||||
voc_ckpt: str,
|
||||
voc_stat: str,
|
||||
output_name: str,
|
||||
source_lang: str,
|
||||
target_lang: str):
|
||||
ngpu = get_ngpu()
|
||||
cmd = f"""
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
||||
python3 {self.BIN_DIR}/synthesize_e2e.py \
|
||||
--task_name={task_name} \
|
||||
--wav_path={input_name} \
|
||||
--old_str='{old_str}' \
|
||||
--new_str='{new_str}' \
|
||||
--source_lang={source_lang} \
|
||||
--target_lang={target_lang} \
|
||||
--erniesat_config={config_path} \
|
||||
--phones_dict={phones_dict} \
|
||||
--erniesat_ckpt={erniesat_ckpt} \
|
||||
--erniesat_stat={erniesat_stat} \
|
||||
--voc={voc} \
|
||||
--voc_config={voc_config} \
|
||||
--voc_ckpt={voc_ckpt} \
|
||||
--voc_stat={voc_stat} \
|
||||
--output_name={output_name} \
|
||||
--ngpu={ngpu}
|
||||
"""
|
||||
|
||||
return cmd
|
@ -0,0 +1,127 @@
|
||||
import os
|
||||
|
||||
from .util import get_ngpu
|
||||
from .util import MAIN_ROOT
|
||||
from .util import run_cmd
|
||||
|
||||
|
||||
def find_max_ckpt(model_path):
|
||||
max_ckpt = 0
|
||||
for filename in os.listdir(model_path):
|
||||
if filename.endswith('.pdz'):
|
||||
files = filename[:-4]
|
||||
a1, a2, it = files.split("_")
|
||||
if int(it) > max_ckpt:
|
||||
max_ckpt = int(it)
|
||||
return max_ckpt
|
||||
|
||||
|
||||
class FineTune:
|
||||
def __init__(self):
|
||||
self.now_file_path = os.path.dirname(__file__)
|
||||
self.PYTHONPATH = os.path.join(MAIN_ROOT,
|
||||
"examples/other/tts_finetune/tts3")
|
||||
self.BIN_DIR = os.path.join(MAIN_ROOT,
|
||||
"paddlespeech/t2s/exps/fastspeech2")
|
||||
self.pretrained_model_dir = os.path.realpath(
|
||||
"source/model/fastspeech2_aishell3_ckpt_1.1.0")
|
||||
self.voc_model_dir = os.path.realpath(
|
||||
"source/model/hifigan_aishell3_ckpt_0.2.0")
|
||||
self.finetune_config = os.path.join("conf/tts3_finetune.yaml")
|
||||
|
||||
def finetune(self, input_dir, exp_dir='temp', epoch=100):
|
||||
"""
|
||||
use cmd follow examples/other/tts_finetune/tts3/run.sh
|
||||
"""
|
||||
newdir_name = "newdir"
|
||||
new_dir = os.path.join(input_dir, newdir_name)
|
||||
mfa_dir = os.path.join(exp_dir, 'mfa_result')
|
||||
dump_dir = os.path.join(exp_dir, 'dump')
|
||||
output_dir = os.path.join(exp_dir, 'exp')
|
||||
lang = "zh"
|
||||
ngpu = get_ngpu()
|
||||
|
||||
cmd = f"""
|
||||
# check oov
|
||||
python3 {self.PYTHONPATH}/local/check_oov.py \
|
||||
--input_dir={input_dir} \
|
||||
--pretrained_model_dir={self.pretrained_model_dir} \
|
||||
--newdir_name={newdir_name} \
|
||||
--lang={lang}
|
||||
|
||||
# get mfa result
|
||||
python3 {self.PYTHONPATH}/local/get_mfa_result.py \
|
||||
--input_dir={new_dir} \
|
||||
--mfa_dir={mfa_dir} \
|
||||
--lang={lang}
|
||||
|
||||
# generate durations.txt
|
||||
python3 {self.PYTHONPATH}/local/generate_duration.py \
|
||||
--mfa_dir={mfa_dir}
|
||||
|
||||
# extract feature
|
||||
python3 {self.PYTHONPATH}/local/extract_feature.py \
|
||||
--duration_file="./durations.txt" \
|
||||
--input_dir={new_dir} \
|
||||
--dump_dir={dump_dir} \
|
||||
--pretrained_model_dir={self.pretrained_model_dir}
|
||||
|
||||
# create finetune env
|
||||
python3 {self.PYTHONPATH}/local/prepare_env.py \
|
||||
--pretrained_model_dir={self.pretrained_model_dir} \
|
||||
--output_dir={output_dir}
|
||||
|
||||
# finetune
|
||||
python3 {self.PYTHONPATH}/local/finetune.py \
|
||||
--pretrained_model_dir={self.pretrained_model_dir} \
|
||||
--dump_dir={dump_dir} \
|
||||
--output_dir={output_dir} \
|
||||
--ngpu={ngpu} \
|
||||
--epoch=100 \
|
||||
--finetune_config={self.finetune_config}
|
||||
"""
|
||||
|
||||
print(cmd)
|
||||
|
||||
return run_cmd(cmd, exp_dir)
|
||||
|
||||
def synthesize(self, text, wav_name, out_wav_dir, exp_dir='temp'):
|
||||
|
||||
voc = "hifigan_aishell3"
|
||||
dump_dir = os.path.join(exp_dir, 'dump')
|
||||
output_dir = os.path.join(exp_dir, 'exp')
|
||||
text_path = os.path.join(exp_dir, 'sentences.txt')
|
||||
lang = "zh"
|
||||
ngpu = get_ngpu()
|
||||
|
||||
model_path = f"{output_dir}/checkpoints"
|
||||
ckpt = find_max_ckpt(model_path)
|
||||
|
||||
# 生成对应的语句
|
||||
with open(text_path, "w", encoding='utf8') as f:
|
||||
f.write(wav_name + " " + text)
|
||||
|
||||
cmd = f"""
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
||||
python3 {self.BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=fastspeech2_aishell3 \
|
||||
--am_config={self.pretrained_model_dir}/default.yaml \
|
||||
--am_ckpt={output_dir}/checkpoints/snapshot_iter_{ckpt}.pdz \
|
||||
--am_stat={self.pretrained_model_dir}/speech_stats.npy \
|
||||
--voc={voc} \
|
||||
--voc_config={self.voc_model_dir}/default.yaml \
|
||||
--voc_ckpt={self.voc_model_dir}/snapshot_iter_2500000.pdz \
|
||||
--voc_stat={self.voc_model_dir}/feats_stats.npy \
|
||||
--lang={lang} \
|
||||
--text={text_path} \
|
||||
--output_dir={out_wav_dir} \
|
||||
--phones_dict={dump_dir}/phone_id_map.txt \
|
||||
--speaker_dict={dump_dir}/speaker_id_map.txt \
|
||||
--spk_id=0 \
|
||||
--ngpu={ngpu}
|
||||
"""
|
||||
|
||||
out_path = os.path.join(out_wav_dir, f"{wav_name}.wav")
|
||||
|
||||
return run_cmd(cmd, out_path)
|
@ -0,0 +1,60 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from .util import get_ngpu
|
||||
from .util import MAIN_ROOT
|
||||
from .util import run_cmd
|
||||
|
||||
|
||||
class VoiceCloneGE2E():
|
||||
def __init__(self):
|
||||
# Path 到指定路径上
|
||||
self.BIN_DIR = os.path.join(MAIN_ROOT, "paddlespeech/t2s/exps")
|
||||
# am
|
||||
self.am = "fastspeech2_aishell3"
|
||||
self.am_config = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/default.yaml"
|
||||
self.am_ckpt = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/snapshot_iter_96400.pdz"
|
||||
self.am_stat = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/speech_stats.npy"
|
||||
self.phones_dict = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/phone_id_map.txt"
|
||||
# voc
|
||||
self.voc = "pwgan_aishell3"
|
||||
self.voc_config = "source/model/pwg_aishell3_ckpt_0.5/default.yaml"
|
||||
self.voc_ckpt = "source/model/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz"
|
||||
self.voc_stat = "source/model/pwg_aishell3_ckpt_0.5/feats_stats.npy"
|
||||
# ge2e
|
||||
self.ge2e_params_path = "source/model/ge2e_ckpt_0.3/step-3000000.pdparams"
|
||||
|
||||
def vc(self, text, input_wav, out_wav):
|
||||
|
||||
# input wav 需要形成临时单独文件夹
|
||||
_, full_file_name = os.path.split(input_wav)
|
||||
ref_audio_dir = os.path.realpath("tmp_dir/ge2e")
|
||||
if os.path.exists(ref_audio_dir):
|
||||
shutil.rmtree(ref_audio_dir)
|
||||
|
||||
os.makedirs(ref_audio_dir, exist_ok=True)
|
||||
shutil.copy(input_wav, ref_audio_dir)
|
||||
|
||||
output_dir = os.path.dirname(out_wav)
|
||||
ngpu = get_ngpu()
|
||||
|
||||
cmd = f"""
|
||||
python3 {self.BIN_DIR}/voice_cloning.py \
|
||||
--am={self.am} \
|
||||
--am_config={self.am_config} \
|
||||
--am_ckpt={self.am_ckpt} \
|
||||
--am_stat={self.am_stat} \
|
||||
--voc={self.voc} \
|
||||
--voc_config={self.voc_config} \
|
||||
--voc_ckpt={self.voc_ckpt} \
|
||||
--voc_stat={self.voc_stat} \
|
||||
--ge2e_params_path={self.ge2e_params_path} \
|
||||
--text="{text}" \
|
||||
--input-dir={ref_audio_dir} \
|
||||
--output-dir={output_dir} \
|
||||
--phones-dict={self.phones_dict} \
|
||||
--ngpu={ngpu}
|
||||
"""
|
||||
|
||||
output_name = os.path.join(output_dir, full_file_name)
|
||||
return run_cmd(cmd, output_name=output_name)
|
@ -0,0 +1,56 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from .util import get_ngpu
|
||||
from .util import MAIN_ROOT
|
||||
from .util import run_cmd
|
||||
|
||||
|
||||
class VoiceCloneTDNN():
|
||||
def __init__(self):
|
||||
# Path 到指定路径上
|
||||
self.BIN_DIR = os.path.join(MAIN_ROOT, "paddlespeech/t2s/exps")
|
||||
|
||||
self.am = "fastspeech2_aishell3"
|
||||
self.am_config = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/default.yaml"
|
||||
self.am_ckpt = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/snapshot_iter_96400.pdz"
|
||||
self.am_stat = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/speech_stats.npy"
|
||||
self.phones_dict = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/phone_id_map.txt"
|
||||
# voc
|
||||
self.voc = "pwgan_aishell3"
|
||||
self.voc_config = "source/model/pwg_aishell3_ckpt_0.5/default.yaml"
|
||||
self.voc_ckpt = "source/model/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz"
|
||||
self.voc_stat = "source/model/pwg_aishell3_ckpt_0.5/feats_stats.npy"
|
||||
|
||||
def vc(self, text, input_wav, out_wav):
|
||||
# input wav 需要形成临时单独文件夹
|
||||
_, full_file_name = os.path.split(input_wav)
|
||||
ref_audio_dir = os.path.realpath("tmp_dir/tdnn")
|
||||
if os.path.exists(ref_audio_dir):
|
||||
shutil.rmtree(ref_audio_dir)
|
||||
os.makedirs(ref_audio_dir, exist_ok=True)
|
||||
shutil.copy(input_wav, ref_audio_dir)
|
||||
|
||||
output_dir = os.path.dirname(out_wav)
|
||||
ngpu = get_ngpu()
|
||||
|
||||
cmd = f"""
|
||||
python3 {self.BIN_DIR}/voice_cloning.py \
|
||||
--am={self.am} \
|
||||
--am_config={self.am_config} \
|
||||
--am_ckpt={self.am_ckpt} \
|
||||
--am_stat={self.am_stat} \
|
||||
--voc={self.voc} \
|
||||
--voc_config={self.voc_config} \
|
||||
--voc_ckpt={self.voc_ckpt} \
|
||||
--voc_stat={self.voc_stat} \
|
||||
--text="{text}" \
|
||||
--input-dir={ref_audio_dir} \
|
||||
--output-dir={output_dir} \
|
||||
--phones-dict={self.phones_dict} \
|
||||
--use_ecapa=True \
|
||||
--ngpu={ngpu}
|
||||
"""
|
||||
|
||||
output_name = os.path.join(output_dir, full_file_name)
|
||||
return run_cmd(cmd, output_name=output_name)
|
@ -0,0 +1,88 @@
|
||||
import axios from 'axios'
|
||||
import {apiURL} from "./API.js"
|
||||
|
||||
// 上传音频-vc
|
||||
export async function vcUpload(params){
|
||||
const result = await axios.post(apiURL.VC_Upload, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 上传音频-sat
|
||||
export async function satUpload(params){
|
||||
const result = await axios.post(apiURL.SAT_Upload, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 上传音频-finetune
|
||||
export async function fineTuneUpload(params){
|
||||
const result = await axios.post(apiURL.FineTune_Upload, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 删除音频
|
||||
export async function vcDel(params){
|
||||
const result = await axios.post(apiURL.VC_Del, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 获取音频列表vc
|
||||
export async function vcList(){
|
||||
const result = await axios.get(apiURL.VC_List);
|
||||
return result
|
||||
}
|
||||
// 获取音频列表Sat
|
||||
export async function satList(){
|
||||
const result = await axios.get(apiURL.SAT_List);
|
||||
return result
|
||||
}
|
||||
|
||||
// 获取音频列表fineTune
|
||||
export async function fineTuneList(params){
|
||||
const result = await axios.post(apiURL.FineTune_List, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// fineTune 一键重置 获取新的文件夹
|
||||
export async function fineTuneNewDir(){
|
||||
const result = await axios.get(apiURL.FineTune_NewDir);
|
||||
return result
|
||||
}
|
||||
|
||||
// 获取音频数据
|
||||
export async function vcDownload(params){
|
||||
const result = await axios.post(apiURL.VC_Download, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 获取音频数据Base64
|
||||
export async function vcDownloadBase64(params){
|
||||
const result = await axios.post(apiURL.VC_Download_Base64, params);
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
// 克隆合成G2P
|
||||
export async function vcCloneG2P(params){
|
||||
const result = await axios.post(apiURL.VC_CloneG2p, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 克隆合成SAT
|
||||
export async function vcCloneSAT(params){
|
||||
const result = await axios.post(apiURL.VC_CloneSAT, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 克隆合成 - finetune 微调
|
||||
export async function vcCloneFineTune(params){
|
||||
const result = await axios.post(apiURL.VC_CloneFineTune, params);
|
||||
return result
|
||||
}
|
||||
|
||||
// 克隆合成 - finetune 合成
|
||||
export async function vcCloneFineTuneSyn(params){
|
||||
const result = await axios.post(apiURL.VC_CloneFineTuneSyn, params);
|
||||
return result
|
||||
}
|
||||
|
||||
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.cls.exps.panns.deploy.predict module
|
||||
=================================================
|
||||
|
||||
.. automodule:: paddlespeech.cls.exps.panns.deploy.predict
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.cls.exps.panns.export\_model module
|
||||
================================================
|
||||
|
||||
.. automodule:: paddlespeech.cls.exps.panns.export_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.cls.exps.panns.predict module
|
||||
==========================================
|
||||
|
||||
.. automodule:: paddlespeech.cls.exps.panns.predict
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.cls.exps.panns.train module
|
||||
========================================
|
||||
|
||||
.. automodule:: paddlespeech.cls.exps.panns.train
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.kws.exps.mdtc.plot\_det\_curve module
|
||||
==================================================
|
||||
|
||||
.. automodule:: paddlespeech.kws.exps.mdtc.plot_det_curve
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.decoders.ctcdecoder.scorer\_deprecated module
|
||||
==============================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.decoders.ctcdecoder.scorer_deprecated
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.decoders.recog\_bin module
|
||||
===========================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.decoders.recog_bin
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.decoders.scorers.ngram module
|
||||
==============================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.decoders.scorers.ngram
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.exps.deepspeech2.bin.deploy.client module
|
||||
==========================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.client
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.exps.deepspeech2.bin.deploy.record module
|
||||
==========================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.record
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.exps.deepspeech2.bin.deploy.send module
|
||||
========================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.send
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.exps.u2.trainer module
|
||||
=======================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.exps.u2.trainer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.exps.u2\_kaldi.bin.recog module
|
||||
================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.exps.u2_kaldi.bin.recog
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.training.extensions.snapshot module
|
||||
====================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.training.extensions.snapshot
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.training.extensions.visualizer module
|
||||
======================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.training.extensions.visualizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.training.updaters.trainer module
|
||||
=================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.training.updaters.trainer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.add\_deltas module
|
||||
=============================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.add_deltas
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.channel\_selector module
|
||||
===================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.channel_selector
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.cmvn module
|
||||
======================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.cmvn
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.functional module
|
||||
============================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.functional
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.perturb module
|
||||
=========================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.perturb
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,24 +0,0 @@
|
||||
paddlespeech.s2t.transform package
|
||||
==================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
paddlespeech.s2t.transform.add_deltas
|
||||
paddlespeech.s2t.transform.channel_selector
|
||||
paddlespeech.s2t.transform.cmvn
|
||||
paddlespeech.s2t.transform.functional
|
||||
paddlespeech.s2t.transform.perturb
|
||||
paddlespeech.s2t.transform.spec_augment
|
||||
paddlespeech.s2t.transform.spectrogram
|
||||
paddlespeech.s2t.transform.transform_interface
|
||||
paddlespeech.s2t.transform.transformation
|
||||
paddlespeech.s2t.transform.wpe
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.spec\_augment module
|
||||
===============================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.spec_augment
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.spectrogram module
|
||||
=============================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.spectrogram
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.transform\_interface module
|
||||
======================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.transform_interface
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.transformation module
|
||||
================================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.transformation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.s2t.transform.wpe module
|
||||
=====================================
|
||||
|
||||
.. automodule:: paddlespeech.s2t.transform.wpe
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.server.engine.acs.python.acs\_engine module
|
||||
========================================================
|
||||
|
||||
.. automodule:: paddlespeech.server.engine.acs.python.acs_engine
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.server.utils.log module
|
||||
====================================
|
||||
|
||||
.. automodule:: paddlespeech.server.utils.log
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.t2s.exps.stream\_play\_tts module
|
||||
==============================================
|
||||
|
||||
.. automodule:: paddlespeech.t2s.exps.stream_play_tts
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.t2s.models.ernie\_sat.mlm module
|
||||
=============================================
|
||||
|
||||
.. automodule:: paddlespeech.t2s.models.ernie_sat.mlm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.t2s.models.vits.monotonic\_align.core module
|
||||
=========================================================
|
||||
|
||||
.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.core
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,16 +0,0 @@
|
||||
paddlespeech.t2s.models.vits.monotonic\_align package
|
||||
=====================================================
|
||||
|
||||
.. automodule:: paddlespeech.t2s.models.vits.monotonic_align
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
paddlespeech.t2s.models.vits.monotonic_align.core
|
||||
paddlespeech.t2s.models.vits.monotonic_align.setup
|
@ -1,7 +0,0 @@
|
||||
paddlespeech.t2s.models.vits.monotonic\_align.setup module
|
||||
==========================================================
|
||||
|
||||
.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.setup
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,44 @@
|
||||
###########################################################
|
||||
# DATA SETTING #
|
||||
###########################################################
|
||||
dataset_type: Ernie
|
||||
train_path: data/iwslt2012_zh/train.txt
|
||||
dev_path: data/iwslt2012_zh/dev.txt
|
||||
test_path: data/iwslt2012_zh/test.txt
|
||||
batch_size: 64
|
||||
num_workers: 2
|
||||
data_params:
|
||||
pretrained_token: ernie-3.0-base-zh
|
||||
punc_path: data/iwslt2012_zh/punc_vocab
|
||||
seq_len: 100
|
||||
|
||||
|
||||
###########################################################
|
||||
# MODEL SETTING #
|
||||
###########################################################
|
||||
model_type: ErnieLinear
|
||||
model:
|
||||
pretrained_token: ernie-3.0-base-zh
|
||||
num_classes: 4
|
||||
|
||||
###########################################################
|
||||
# OPTIMIZER SETTING #
|
||||
###########################################################
|
||||
optimizer_params:
|
||||
weight_decay: 1.0e-6 # weight decay coefficient.
|
||||
|
||||
scheduler_params:
|
||||
learning_rate: 1.0e-5 # learning rate.
|
||||
gamma: 0.9999 # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
|
||||
|
||||
###########################################################
|
||||
# TRAINING SETTING #
|
||||
###########################################################
|
||||
max_epoch: 20
|
||||
num_snapshots: 5
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_snapshots: 10 # max number of snapshots to keep while training
|
||||
seed: 42 # random seed for paddle, random, and np.random
|
@ -0,0 +1,44 @@
|
||||
###########################################################
|
||||
# DATA SETTING #
|
||||
###########################################################
|
||||
dataset_type: Ernie
|
||||
train_path: data/iwslt2012_zh/train.txt
|
||||
dev_path: data/iwslt2012_zh/dev.txt
|
||||
test_path: data/iwslt2012_zh/test.txt
|
||||
batch_size: 64
|
||||
num_workers: 2
|
||||
data_params:
|
||||
pretrained_token: ernie-3.0-medium-zh
|
||||
punc_path: data/iwslt2012_zh/punc_vocab
|
||||
seq_len: 100
|
||||
|
||||
|
||||
###########################################################
|
||||
# MODEL SETTING #
|
||||
###########################################################
|
||||
model_type: ErnieLinear
|
||||
model:
|
||||
pretrained_token: ernie-3.0-medium-zh
|
||||
num_classes: 4
|
||||
|
||||
###########################################################
|
||||
# OPTIMIZER SETTING #
|
||||
###########################################################
|
||||
optimizer_params:
|
||||
weight_decay: 1.0e-6 # weight decay coefficient.
|
||||
|
||||
scheduler_params:
|
||||
learning_rate: 1.0e-5 # learning rate.
|
||||
gamma: 0.9999 # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
|
||||
|
||||
###########################################################
|
||||
# TRAINING SETTING #
|
||||
###########################################################
|
||||
max_epoch: 20
|
||||
num_snapshots: 5
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_snapshots: 10 # max number of snapshots to keep while training
|
||||
seed: 42 # random seed for paddle, random, and np.random
|
@ -0,0 +1,44 @@
|
||||
###########################################################
|
||||
# DATA SETTING #
|
||||
###########################################################
|
||||
dataset_type: Ernie
|
||||
train_path: data/iwslt2012_zh/train.txt
|
||||
dev_path: data/iwslt2012_zh/dev.txt
|
||||
test_path: data/iwslt2012_zh/test.txt
|
||||
batch_size: 64
|
||||
num_workers: 2
|
||||
data_params:
|
||||
pretrained_token: ernie-3.0-mini-zh
|
||||
punc_path: data/iwslt2012_zh/punc_vocab
|
||||
seq_len: 100
|
||||
|
||||
|
||||
###########################################################
|
||||
# MODEL SETTING #
|
||||
###########################################################
|
||||
model_type: ErnieLinear
|
||||
model:
|
||||
pretrained_token: ernie-3.0-mini-zh
|
||||
num_classes: 4
|
||||
|
||||
###########################################################
|
||||
# OPTIMIZER SETTING #
|
||||
###########################################################
|
||||
optimizer_params:
|
||||
weight_decay: 1.0e-6 # weight decay coefficient.
|
||||
|
||||
scheduler_params:
|
||||
learning_rate: 1.0e-5 # learning rate.
|
||||
gamma: 0.9999 # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
|
||||
|
||||
###########################################################
|
||||
# TRAINING SETTING #
|
||||
###########################################################
|
||||
max_epoch: 20
|
||||
num_snapshots: 5
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_snapshots: 10 # max number of snapshots to keep while training
|
||||
seed: 42 # random seed for paddle, random, and np.random
|
@ -0,0 +1,44 @@
|
||||
###########################################################
|
||||
# DATA SETTING #
|
||||
###########################################################
|
||||
dataset_type: Ernie
|
||||
train_path: data/iwslt2012_zh/train.txt
|
||||
dev_path: data/iwslt2012_zh/dev.txt
|
||||
test_path: data/iwslt2012_zh/test.txt
|
||||
batch_size: 64
|
||||
num_workers: 2
|
||||
data_params:
|
||||
pretrained_token: ernie-3.0-nano-zh
|
||||
punc_path: data/iwslt2012_zh/punc_vocab
|
||||
seq_len: 100
|
||||
|
||||
|
||||
###########################################################
|
||||
# MODEL SETTING #
|
||||
###########################################################
|
||||
model_type: ErnieLinear
|
||||
model:
|
||||
pretrained_token: ernie-3.0-nano-zh
|
||||
num_classes: 4
|
||||
|
||||
###########################################################
|
||||
# OPTIMIZER SETTING #
|
||||
###########################################################
|
||||
optimizer_params:
|
||||
weight_decay: 1.0e-6 # weight decay coefficient.
|
||||
|
||||
scheduler_params:
|
||||
learning_rate: 1.0e-5 # learning rate.
|
||||
gamma: 0.9999 # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
|
||||
|
||||
###########################################################
|
||||
# TRAINING SETTING #
|
||||
###########################################################
|
||||
max_epoch: 20
|
||||
num_snapshots: 5
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_snapshots: 10 # max number of snapshots to keep while training
|
||||
seed: 42 # random seed for paddle, random, and np.random
|
@ -0,0 +1,44 @@
|
||||
###########################################################
|
||||
# DATA SETTING #
|
||||
###########################################################
|
||||
dataset_type: Ernie
|
||||
train_path: data/iwslt2012_zh/train.txt
|
||||
dev_path: data/iwslt2012_zh/dev.txt
|
||||
test_path: data/iwslt2012_zh/test.txt
|
||||
batch_size: 64
|
||||
num_workers: 2
|
||||
data_params:
|
||||
pretrained_token: ernie-tiny
|
||||
punc_path: data/iwslt2012_zh/punc_vocab
|
||||
seq_len: 100
|
||||
|
||||
|
||||
###########################################################
|
||||
# MODEL SETTING #
|
||||
###########################################################
|
||||
model_type: ErnieLinear
|
||||
model:
|
||||
pretrained_token: ernie-tiny
|
||||
num_classes: 4
|
||||
|
||||
###########################################################
|
||||
# OPTIMIZER SETTING #
|
||||
###########################################################
|
||||
optimizer_params:
|
||||
weight_decay: 1.0e-6 # weight decay coefficient.
|
||||
|
||||
scheduler_params:
|
||||
learning_rate: 1.0e-5 # learning rate.
|
||||
gamma: 0.9999 # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
|
||||
|
||||
###########################################################
|
||||
# TRAINING SETTING #
|
||||
###########################################################
|
||||
max_epoch: 20
|
||||
num_snapshots: 5
|
||||
|
||||
###########################################################
|
||||
# OTHER SETTING #
|
||||
###########################################################
|
||||
num_snapshots: 10 # max number of snapshots to keep while training
|
||||
seed: 42 # random seed for paddle, random, and np.random
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue