diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb index 587924cd..2bb407be 100644 --- a/docs/tutorial/tts/tts_tutorial.ipynb +++ b/docs/tutorial/tts/tts_tutorial.ipynb @@ -252,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 25, "metadata": { "scrolled": true }, @@ -261,8 +261,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" + "env: CUDA_VISIBLE_DEVICES=0\n" ] } ], @@ -284,7 +283,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 28, "metadata": { "scrolled": true }, @@ -317,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 30, "metadata": { "scrolled": true }, @@ -596,11 +595,19 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frontend done!\n" + ] + } + ], "source": [ "# 传入 phones_dict 会把相应的 phones 转换成 phone_ids\n", "frontend = Frontend(phone_vocab_path=phones_dict)\n", @@ -619,25 +626,11 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 35, "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building prefix dict from the default dictionary ...\n", - "DEBUG:jieba:Building prefix dict from the default dictionary ...\n", - "Loading model from cache /tmp/jieba.cache\n", - "DEBUG:jieba:Loading model from cache /tmp/jieba.cache\n", - "Loading model cost 5.331 seconds.\n", - "DEBUG:jieba:Loading model cost 5.331 seconds.\n", - "Prefix dict has been built successfully.\n", - "DEBUG:jieba:Prefix dict has been built successfully.\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -701,8 +694,10 @@ "

\n", "在本教程中,我们使用 `FastSpeech2` 作为声学模型。\n", "![FastSpeech2](source/fastspeech2.png)\n", + "\n", "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于,我们使用的的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似)。\n", "![FastPitch](source/fastpitch.png)\n", + "\n", "更多关于[声学模型的发展及改进](https://paddlespeech.readthedocs.io/en/latest/tts/models_introduction.html)。" ] }, @@ -1020,13 +1015,16 @@ "odim = fastspeech2_config.n_mels\n", "model = FastSpeech2(\n", " idim=vocab_size, odim=odim, **fastspeech2_config[\"model\"])\n", - "\n", - "model.set_state_dict(paddle.load(fastspeech2_checkpoint)[\"main_params\"]) # 加载预训练模型参数\n", - "model.eval() # 推理阶段不启用 batch norm 和 dropout\n", + "# 加载预训练模型参数\n", + "model.set_state_dict(paddle.load(fastspeech2_checkpoint)[\"main_params\"])\n", + "# 推理阶段不启用 batch norm 和 dropout\n", + "model.eval()\n", "stat = np.load(fastspeech2_stat)\n", - "mu, std = stat # 读取数据预处理阶段数据集的均值和标准差\n", + "# 读取数据预处理阶段数据集的均值和标准差\n", + "mu, std = stat\n", "mu, std = paddle.to_tensor(mu), paddle.to_tensor(std)\n", - "fastspeech2_normalizer = ZScore(mu, std) # 构造归一化的新模型\n", + "# 构造归一化的新模型\n", + "fastspeech2_normalizer = ZScore(mu, std)\n", "fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model)\n", "fastspeech2_inference.eval()\n", "print(fastspeech2_inference)\n", @@ -1153,16 +1151,18 @@ ], "source": [ "vocoder = PWGGenerator(**pwg_config[\"generator_params\"])\n", - "\n", - "vocoder.set_state_dict(paddle.load(pwg_checkpoint)[\"generator_params\"]) # 模型加载预训练参数\n", + "# 模型加载预训练参数\n", + "vocoder.set_state_dict(paddle.load(pwg_checkpoint)[\"generator_params\"]) \n", "vocoder.remove_weight_norm()\n", - "vocoder.eval() # 推理阶段不启用 batch norm 和 dropout\n", - "\n", - "stat = np.load(pwg_stat) # 读取数据预处理阶段数据集的均值和标准差\n", + "# 推理阶段不启用 batch norm 和 dropout\n", + "vocoder.eval()\n", + "# 读取数据预处理阶段数据集的均值和标准差\n", + "stat = np.load(pwg_stat)\n", "mu, std = stat\n", "mu, std = paddle.to_tensor(mu), paddle.to_tensor(std)\n", "pwg_normalizer = ZScore(mu, std)\n", - "pwg_inference = PWGInference(pwg_normalizer, vocoder) # 构建归一化的模型\n", + "# 构建归一化的模型\n", + "pwg_inference = PWGInference(pwg_normalizer, vocoder)\n", "pwg_inference.eval()\n", "print(\"Parallel WaveGAN done!\")" ] @@ -1266,7 +1266,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 36, "metadata": {}, "outputs": [ {