You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
108 lines
3.6 KiB
108 lines
3.6 KiB
2 years ago
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
from typing import Union
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
__all__ = ["pcm16to32", "depth_convert"]
|
||
|
|
||
|
|
||
|
def pcm16to32(audio: np.ndarray) -> np.ndarray:
|
||
|
"""pcm int16 to float32
|
||
|
|
||
|
Args:
|
||
|
audio (np.ndarray): Waveform with dtype of int16.
|
||
|
|
||
|
Returns:
|
||
|
np.ndarray: Waveform with dtype of float32.
|
||
|
"""
|
||
|
if audio.dtype == np.int16:
|
||
|
audio = audio.astype("float32")
|
||
|
bits = np.iinfo(np.int16).bits
|
||
|
audio = audio / (2**(bits - 1))
|
||
|
return audio
|
||
|
|
||
|
|
||
|
def _safe_cast(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
|
||
|
"""Data type casting in a safe way, i.e., prevent overflow or underflow.
|
||
|
|
||
|
Args:
|
||
|
y (np.ndarray): Input waveform array in 1D or 2D.
|
||
|
dtype (Union[type, str]): Data type of waveform.
|
||
|
|
||
|
Returns:
|
||
|
np.ndarray: `y` after safe casting.
|
||
|
"""
|
||
|
if 'float' in str(y.dtype):
|
||
|
return np.clip(y, np.finfo(dtype).min,
|
||
|
np.finfo(dtype).max).astype(dtype)
|
||
|
else:
|
||
|
return np.clip(y, np.iinfo(dtype).min,
|
||
|
np.iinfo(dtype).max).astype(dtype)
|
||
|
|
||
|
|
||
|
def depth_convert(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
|
||
|
"""Convert audio array to target dtype safely.
|
||
|
This function convert audio waveform to a target dtype, with addition steps of
|
||
|
preventing overflow/underflow and preserving audio range.
|
||
|
|
||
|
Args:
|
||
|
y (np.ndarray): Input waveform array in 1D or 2D.
|
||
|
dtype (Union[type, str]): Data type of waveform.
|
||
|
|
||
|
Returns:
|
||
|
np.ndarray: `y` after safe casting.
|
||
|
"""
|
||
|
|
||
|
SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64']
|
||
|
if y.dtype not in SUPPORT_DTYPE:
|
||
|
raise ParameterError(
|
||
|
'Unsupported audio dtype, '
|
||
|
f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}')
|
||
|
|
||
|
if dtype not in SUPPORT_DTYPE:
|
||
|
raise ParameterError(
|
||
|
'Unsupported audio dtype, '
|
||
|
f'target dtype is {dtype}, supported dtypes are {SUPPORT_DTYPE}')
|
||
|
|
||
|
if dtype == y.dtype:
|
||
|
return y
|
||
|
|
||
|
if dtype == 'float64' and y.dtype == 'float32':
|
||
|
return _safe_cast(y, dtype)
|
||
|
if dtype == 'float32' and y.dtype == 'float64':
|
||
|
return _safe_cast(y, dtype)
|
||
|
|
||
|
if dtype == 'int16' or dtype == 'int8':
|
||
|
if y.dtype in ['float64', 'float32']:
|
||
|
factor = np.iinfo(dtype).max
|
||
|
y = np.clip(y * factor, np.iinfo(dtype).min,
|
||
|
np.iinfo(dtype).max).astype(dtype)
|
||
|
y = y.astype(dtype)
|
||
|
else:
|
||
|
if dtype == 'int16' and y.dtype == 'int8':
|
||
|
factor = np.iinfo('int16').max / np.iinfo('int8').max - EPS
|
||
|
y = y.astype('float32') * factor
|
||
|
y = y.astype('int16')
|
||
|
|
||
|
else: # dtype == 'int8' and y.dtype=='int16':
|
||
|
y = y.astype('int32') * np.iinfo('int8').max / \
|
||
|
np.iinfo('int16').max
|
||
|
y = y.astype('int8')
|
||
|
|
||
|
if dtype in ['float32', 'float64']:
|
||
|
org_dtype = y.dtype
|
||
|
y = y.astype(dtype) / np.iinfo(org_dtype).max
|
||
|
return y
|