Merge branch 'PaddlePaddle:develop' into develop

pull/2932/head
HuangLiangJie 3 years ago committed by GitHub
commit 859e8d2339
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -152,8 +152,7 @@ class WhisperExecutor(BaseExecutor):
Init model and other resources from a specific path.
"""
logger.debug("start to init the model")
# default max_len: unit:second
self.max_len = 50
if hasattr(self, 'model'):
logger.debug('Model had been initialized.')
return
@ -339,12 +338,6 @@ class WhisperExecutor(BaseExecutor):
try:
audio, audio_sample_rate = soundfile.read(
audio_file, dtype="int16", always_2d=True)
audio_duration = audio.shape[0] / audio_sample_rate
if audio_duration > self.max_len:
logger.error(
f"Please input audio file less then {self.max_len} seconds.\n"
)
return False
except Exception as e:
logger.exception(e)
logger.error(

@ -1,5 +1,5 @@
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors and . All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/__init__.py)
from paddlespeech.s2t.models.whisper.whipser import decode

@ -1,5 +1,5 @@
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors and . All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/tokenizer.py)
import os

@ -1,5 +1,5 @@
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors and . All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/utils.py)
import zlib

@ -1,5 +1,5 @@
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors and . All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper)
import os

@ -360,6 +360,8 @@ class GaussianDiffusion(nn.Layer):
num_inference_steps: Optional[int]=1000,
strength: Optional[float]=None,
scheduler_type: Optional[str]="ddpm",
clip_noise: Optional[bool]=True,
clip_noise_range: Optional[Tuple[float, float]]=(-1, 1),
callback: Optional[Callable[[int, int, int, paddle.Tensor],
None]]=None,
callback_steps: Optional[int]=1):
@ -380,6 +382,10 @@ class GaussianDiffusion(nn.Layer):
scheduler_type (str, optional):
Noise scheduler for generate noises.
Choose a great scheduler can skip many denoising step, by default 'ddpm'.
clip_noise (bool, optional):
Whether to clip each denoised output, by default True.
clip_noise_range (tuple, optional):
denoised output min and max value range after clip, by default (-1, 1).
callback (Callable[[int,int,int,Tensor], None], optional):
Callback function during denoising steps.
@ -440,6 +446,9 @@ class GaussianDiffusion(nn.Layer):
# denoising loop
denoised_output = noisy_input
if clip_noise:
n_min, n_max = clip_noise_range
denoised_output = paddle.clip(denoised_output, n_min, n_max)
num_warmup_steps = len(
timesteps) - num_inference_steps * scheduler.order
for i, t in enumerate(timesteps):
@ -451,6 +460,8 @@ class GaussianDiffusion(nn.Layer):
# compute the previous noisy sample x_t -> x_t-1
denoised_output = scheduler.step(noise_pred, t,
denoised_output).prev_sample
if clip_noise:
denoised_output = paddle.clip(denoised_output, n_min, n_max)
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and

@ -129,7 +129,7 @@ decoders_module = [
setup(
name='paddlespeech_ctcdecoders',
version='0.2.0',
version='0.2.2',
description="CTC decoders in paddlespeech",
author="PaddlePaddle Speech and Language Team",
author_email="paddlesl@baidu.com",

Loading…
Cancel
Save