fix speed, add setup, test=doc (#1415)

pull/1444/head
liangym 3 years ago committed by GitHub
parent 35738988b2
commit f86037e026
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -61,6 +61,9 @@ requirements = {
"visualdl", "visualdl",
"webrtcvad", "webrtcvad",
"yacs~=0.1.8", "yacs~=0.1.8",
# fastapi server
"fastapi",
"uvicorn",
], ],
"develop": [ "develop": [
"ConfigArgParse", "ConfigArgParse",

@ -11,4 +11,5 @@ port: 8090
################################################################## ##################################################################
# add engine type (Options: asr, tts) and config file here. # add engine type (Options: asr, tts) and config file here.
engine_backend: engine_backend:
asr: 'conf/asr/asr.yaml' asr: 'conf/asr/asr.yaml'
tts: 'conf/tts/tts.yaml'

@ -13,19 +13,18 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import base64 import base64
import os import io
import random
import librosa import librosa
import numpy as np import numpy as np
import soundfile as sf import soundfile as sf
import yaml import yaml
from engine.base_engine import BaseEngine from engine.base_engine import BaseEngine
from ffmpeg import audio from scipy.io import wavfile
from paddlespeech.cli.log import logger from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor from paddlespeech.cli.tts.infer import TTSExecutor
from utils.audio_types import wav2pcm from utils.audio_process import change_speed
from utils.errors import ErrorCode from utils.errors import ErrorCode
from utils.exception import ServerBaseException from utils.exception import ServerBaseException
@ -107,26 +106,27 @@ class TTSEngine(BaseEngine):
wav_vol = wav_tar_fs * volume wav_vol = wav_tar_fs * volume
# transform speed # transform speed
hash = random.getrandbits(128) try: # windows not support soxbindings
temp_wav = str(hash) + ".wav" wav_speed = change_speed(wav_vol, speed, target_fs)
temp_speed_wav = str(hash + 1) + ".wav" except:
sf.write(temp_wav, wav_vol.reshape(-1, 1), target_fs) raise ServerBaseException(
audio.a_speed(temp_wav, speed, temp_speed_wav) ErrorCode.SERVER_INTERNAL_ERR,
os.system("rm %s" % (temp_wav)) "Can not install soxbindings on your system.")
# wav to base64 # wav to base64
with open(temp_speed_wav, 'rb') as f: buf = io.BytesIO()
base64_bytes = base64.b64encode(f.read()) wavfile.write(buf, target_fs, wav_speed)
wav_base64 = base64_bytes.decode('utf-8') base64_bytes = base64.b64encode(buf.read())
wav_base64 = base64_bytes.decode('utf-8')
# save audio # save audio
if audio_path is not None and audio_path.endswith(".wav"): if audio_path is not None and audio_path.endswith(".wav"):
os.system("mv %s %s" % (temp_speed_wav, audio_path)) sf.write(audio_path, wav_speed, target_fs)
elif audio_path is not None and audio_path.endswith(".pcm"): elif audio_path is not None and audio_path.endswith(".pcm"):
wav2pcm(temp_speed_wav, audio_path, data_type=np.int16) wav_norm = wav_speed * (32767 / max(0.001,
os.system("rm %s" % (temp_speed_wav)) np.max(np.abs(wav_speed))))
else: with open(audio_path, "wb") as f:
os.system("rm %s" % (temp_speed_wav)) f.write(wav_norm.astype(np.int16))
return target_fs, wav_base64 return target_fs, wav_base64

@ -0,0 +1,87 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import wave
import numpy as np
def wav2pcm(wavfile, pcmfile, data_type=np.int16):
f = open(wavfile, "rb")
f.seek(0)
f.read(44)
data = np.fromfile(f, dtype=data_type)
data.tofile(pcmfile)
def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
pcmf = open(pcm_file, 'rb')
pcmdata = pcmf.read()
pcmf.close()
if bits % 8 != 0:
raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
wavfile = wave.open(wav_file, 'wb')
wavfile.setnchannels(channels)
wavfile.setsampwidth(bits // 8)
wavfile.setframerate(sample_rate)
wavfile.writeframes(pcmdata)
wavfile.close()
def change_speed(sample_raw, speed_rate, sample_rate):
"""Change the audio speed by linear interpolation.
Note that this is an in-place transformation.
:param speed_rate: Rate of speed change:
speed_rate > 1.0, speed up the audio;
speed_rate = 1.0, unchanged;
speed_rate < 1.0, slow down the audio;
speed_rate <= 0.0, not allowed, raise ValueError.
:type speed_rate: float
:raises ValueError: If speed_rate <= 0.0.
"""
if speed_rate == 1.0:
return
if speed_rate <= 0:
raise ValueError("speed_rate should be greater than zero.")
# numpy
# old_length = self._samples.shape[0]
# new_length = int(old_length / speed_rate)
# old_indices = np.arange(old_length)
# new_indices = np.linspace(start=0, stop=old_length, num=new_length)
# self._samples = np.interp(new_indices, old_indices, self._samples)
# sox, slow
try:
import soxbindings as sox
except ImportError:
try:
from paddlespeech.s2t.utils import dynamic_pip_install
package = "sox"
dynamic_pip_install.install(package)
package = "soxbindings"
dynamic_pip_install.install(package)
import soxbindings as sox
except Exception:
raise RuntimeError("Can not install soxbindings on your system.")
tfm = sox.Transformer()
tfm.set_globals(multithread=False)
tfm.tempo(speed_rate)
sample_speed = tfm.build_array(
input_array=sample_raw,
sample_rate_in=sample_rate).squeeze(-1).astype(np.float32).copy()
return sample_speed

@ -1,40 +0,0 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import wave
import numpy as np
def wav2pcm(wavfile, pcmfile, data_type=np.int16):
f = open(wavfile, "rb")
f.seek(0)
f.read(44)
data = np.fromfile(f, dtype=data_type)
data.tofile(pcmfile)
def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
pcmf = open(pcm_file, 'rb')
pcmdata = pcmf.read()
pcmf.close()
if bits % 8 != 0:
raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
wavfile = wave.open(wav_file, 'wb')
wavfile.setnchannels(channels)
wavfile.setsampwidth(bits // 8)
wavfile.setframerate(sample_rate)
wavfile.writeframes(pcmdata)
wavfile.close()
Loading…
Cancel
Save