fix speed, add setup, test=doc (#1415)

pull/1444/head
liangym 3 years ago committed by GitHub
parent 35738988b2
commit f86037e026
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -61,6 +61,9 @@ requirements = {
"visualdl",
"webrtcvad",
"yacs~=0.1.8",
# fastapi server
"fastapi",
"uvicorn",
],
"develop": [
"ConfigArgParse",

@ -11,4 +11,5 @@ port: 8090
##################################################################
# add engine type (Options: asr, tts) and config file here.
engine_backend:
asr: 'conf/asr/asr.yaml'
asr: 'conf/asr/asr.yaml'
tts: 'conf/tts/tts.yaml'

@ -13,19 +13,18 @@
# limitations under the License.
import argparse
import base64
import os
import random
import io
import librosa
import numpy as np
import soundfile as sf
import yaml
from engine.base_engine import BaseEngine
from ffmpeg import audio
from scipy.io import wavfile
from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor
from utils.audio_types import wav2pcm
from utils.audio_process import change_speed
from utils.errors import ErrorCode
from utils.exception import ServerBaseException
@ -107,26 +106,27 @@ class TTSEngine(BaseEngine):
wav_vol = wav_tar_fs * volume
# transform speed
hash = random.getrandbits(128)
temp_wav = str(hash) + ".wav"
temp_speed_wav = str(hash + 1) + ".wav"
sf.write(temp_wav, wav_vol.reshape(-1, 1), target_fs)
audio.a_speed(temp_wav, speed, temp_speed_wav)
os.system("rm %s" % (temp_wav))
try: # windows not support soxbindings
wav_speed = change_speed(wav_vol, speed, target_fs)
except:
raise ServerBaseException(
ErrorCode.SERVER_INTERNAL_ERR,
"Can not install soxbindings on your system.")
# wav to base64
with open(temp_speed_wav, 'rb') as f:
base64_bytes = base64.b64encode(f.read())
wav_base64 = base64_bytes.decode('utf-8')
buf = io.BytesIO()
wavfile.write(buf, target_fs, wav_speed)
base64_bytes = base64.b64encode(buf.read())
wav_base64 = base64_bytes.decode('utf-8')
# save audio
if audio_path is not None and audio_path.endswith(".wav"):
os.system("mv %s %s" % (temp_speed_wav, audio_path))
sf.write(audio_path, wav_speed, target_fs)
elif audio_path is not None and audio_path.endswith(".pcm"):
wav2pcm(temp_speed_wav, audio_path, data_type=np.int16)
os.system("rm %s" % (temp_speed_wav))
else:
os.system("rm %s" % (temp_speed_wav))
wav_norm = wav_speed * (32767 / max(0.001,
np.max(np.abs(wav_speed))))
with open(audio_path, "wb") as f:
f.write(wav_norm.astype(np.int16))
return target_fs, wav_base64

@ -0,0 +1,87 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import wave
import numpy as np
def wav2pcm(wavfile, pcmfile, data_type=np.int16):
f = open(wavfile, "rb")
f.seek(0)
f.read(44)
data = np.fromfile(f, dtype=data_type)
data.tofile(pcmfile)
def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
pcmf = open(pcm_file, 'rb')
pcmdata = pcmf.read()
pcmf.close()
if bits % 8 != 0:
raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
wavfile = wave.open(wav_file, 'wb')
wavfile.setnchannels(channels)
wavfile.setsampwidth(bits // 8)
wavfile.setframerate(sample_rate)
wavfile.writeframes(pcmdata)
wavfile.close()
def change_speed(sample_raw, speed_rate, sample_rate):
"""Change the audio speed by linear interpolation.
Note that this is an in-place transformation.
:param speed_rate: Rate of speed change:
speed_rate > 1.0, speed up the audio;
speed_rate = 1.0, unchanged;
speed_rate < 1.0, slow down the audio;
speed_rate <= 0.0, not allowed, raise ValueError.
:type speed_rate: float
:raises ValueError: If speed_rate <= 0.0.
"""
if speed_rate == 1.0:
return
if speed_rate <= 0:
raise ValueError("speed_rate should be greater than zero.")
# numpy
# old_length = self._samples.shape[0]
# new_length = int(old_length / speed_rate)
# old_indices = np.arange(old_length)
# new_indices = np.linspace(start=0, stop=old_length, num=new_length)
# self._samples = np.interp(new_indices, old_indices, self._samples)
# sox, slow
try:
import soxbindings as sox
except ImportError:
try:
from paddlespeech.s2t.utils import dynamic_pip_install
package = "sox"
dynamic_pip_install.install(package)
package = "soxbindings"
dynamic_pip_install.install(package)
import soxbindings as sox
except Exception:
raise RuntimeError("Can not install soxbindings on your system.")
tfm = sox.Transformer()
tfm.set_globals(multithread=False)
tfm.tempo(speed_rate)
sample_speed = tfm.build_array(
input_array=sample_raw,
sample_rate_in=sample_rate).squeeze(-1).astype(np.float32).copy()
return sample_speed

@ -1,40 +0,0 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import wave
import numpy as np
def wav2pcm(wavfile, pcmfile, data_type=np.int16):
f = open(wavfile, "rb")
f.seek(0)
f.read(44)
data = np.fromfile(f, dtype=data_type)
data.tofile(pcmfile)
def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
pcmf = open(pcm_file, 'rb')
pcmdata = pcmf.read()
pcmf.close()
if bits % 8 != 0:
raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
wavfile = wave.open(wav_file, 'wb')
wavfile.setnchannels(channels)
wavfile.setsampwidth(bits // 8)
wavfile.setframerate(sample_rate)
wavfile.writeframes(pcmdata)
wavfile.close()
Loading…
Cancel
Save