pull/3900/head
drryanhuang 9 months ago
parent 1c02ee4dc5
commit a5f00db532

@ -1,12 +1,12 @@
__version__ = "0.0.1"
from .core import AudioSignal
from .core import STFTParams
from .core import Meter
from .core import util
from .core import highpass_filter, highpass_filters
from . import metrics
from . import data
from . import metrics
from . import ml
from . import post
from .core import AudioSignal
from .core import highpass_filter
from .core import highpass_filters
from .core import Meter
from .core import STFTParams
from .core import util
from .data import datasets
from .data import transforms

@ -15,14 +15,14 @@ from typing import Sequence
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
sys.path.append("/home/aistudio/PaddleSpeech")
from paddlespeech.t2s.modules import fft_conv1d
from paddlespeech.t2s.modules import FFTConv1D
from paddlespeech.utils import satisfy_paddle_version
__all__ = [
'fft_conv1d', 'FFTConv1D', 'highpass_filter', 'highpass_filters',
'lowpass_filter', 'LowPassFilter', 'LowPassFilters', 'pure_tone',
'resample_frac', 'split_bands', 'SplitBands'
'highpass_filter', 'highpass_filters', 'lowpass_filter', 'LowPassFilter',
'LowPassFilters', 'pure_tone', 'resample_frac', 'split_bands', 'SplitBands'
]
@ -61,6 +61,9 @@ def sinc(x: paddle.Tensor):
__Warning__: the input is not multiplied by `pi`!
"""
if satisfy_paddle_version("2.6"):
return paddle.sinc(x)
return paddle.where(
x == 0,
paddle.to_tensor(1.0, dtype=x.dtype, place=x.place),
@ -103,7 +106,7 @@ class ResampleFrac(paddle.nn.Layer):
>>> print(len(resample(x)))
1250
"""
super(ResampleFrac, self).__init__()
super().__init__()
if not isinstance(old_sr, int) or not isinstance(new_sr, int):
raise ValueError("old_sr and new_sr should be integers")
gcd = math.gcd(old_sr, new_sr)
@ -257,7 +260,7 @@ class LowPassFilters(nn.Layer):
zeros: float=8,
fft: Optional[bool]=None,
dtype="float32"):
super(LowPassFilters, self).__init__()
super().__init__()
self.cutoffs = list(cutoffs)
if min(self.cutoffs) < 0:
raise ValueError("Minimum cutoff must be larger than zero.")
@ -325,7 +328,7 @@ class LowPassFilter(nn.Layer):
pad: bool=True,
zeros: float=8,
fft: Optional[bool]=None):
super(LowPassFilter, self).__init__()
super().__init__()
self._lowpasses = LowPassFilters([cutoff], stride, pad, zeros, fft)
@property
@ -583,7 +586,7 @@ class SplitBands(paddle.nn.Layer):
pad: bool=True,
zeros: float=8,
fft: Optional[bool]=None, ):
super(SplitBands, self).__init__()
super().__init__()
if (cutoffs is None) + (n_bands is None) != 1:
raise ValueError(
"You must provide either n_bands, or cutoffs, but not both.")

@ -587,7 +587,7 @@ class AudioSignal(
self.original_signal_length = self.signal_length
self.sample_rate = sample_rate
# return self.to(device)
return self
def write(self, audio_path: typing.Union[str, Path]):
@ -1198,7 +1198,6 @@ class AudioSignal(
padding_type = self.stft_params.padding_type if padding_type is None else padding_type
window = self.get_window(window_type, window_length)
# window = window.to(self.audio_data.device)
audio_data = self.audio_data
right_pad, pad = self.compute_stft_padding(window_length, hop_length,
@ -1362,6 +1361,26 @@ class AudioSignal(
paddle.Tensor [shape=(batch, channels, mels, time)]
Mel spectrogram.
"""
# from paddle.audio.compliance.librosa import melspectrogram
# # from ..compliance.librosa import melspectrogram
# return melspectrogram(
# x=self.audio_data,
# sr=self.sample_rate,
# window_size: int=512,
# hop_length: int=320,
# n_mels: int=64,
# fmin: float=50.0,
# fmax: Optional[float]=None,
# window: str='hann',
# center: bool=True,
# pad_mode: str='reflect',
# power: float=2.0,
# to_db: bool=True,
# ref: float=1.0,
# amin: float=1e-10,
# top_db: Optional[float]=None
# )
stft = self.stft(**kwargs)
magnitude = paddle.abs(stft)
@ -1429,6 +1448,9 @@ class AudioSignal(
MFCCs.
"""
# from paddle.audio.compliance.librosa import mfcc
# return mfcc(self.audio_data, self.sample_rate, n_mfcc=n_mfcc, n_mels=n_mels)
mel_spectrogram = self.mel_spectrogram(n_mels, **kwargs)
mel_spectrogram = paddle.log(mel_spectrogram + log_offset)
dct_mat = self.get_dct(n_mfcc, n_mels, "ortho", self.device)

Loading…
Cancel
Save