add audio utils

pull/556/head
Hui Zhang 5 years ago
parent 7635f98bce
commit b769579eaf

@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
"""Contains data helper functions.""" """Contains data helper functions."""
import numpy as np
import math
import json import json
import codecs import codecs
import os import os
@ -50,3 +52,85 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
json_data["duration"] >= min_duration): json_data["duration"] >= min_duration):
manifest.append(json_data) manifest.append(json_data)
return manifest return manifest
def rms_to_db(rms: float):
"""Root Mean Square to dB.
Args:
rms ([float]): root mean square
Returns:
float: dB
"""
return 20.0 * math.log10(max(1e-16, rms))
def rms_to_dbfs(rms: float):
"""Root Mean Square to dBFS.
https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/
Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB.
dB = dBFS + 3.0103
dBFS = db - 3.0103
e.g. 0 dB = -3.0103 dBFS
Args:
rms ([float]): root mean square
Returns:
float: dBFS
"""
return rms_to_db(rms) - 3.0103
def max_dbfs(sample_data: np.ndarry):
"""Peak dBFS based on the maximum energy sample.
Args:
sample_data ([np.ndarry]): float array, [-1, 1].
Returns:
float: dBFS
"""
# Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization.
return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data))))
def mean_dbfs(sample_data):
"""Peak dBFS based on the RMS energy.
Args:
sample_data ([np.ndarry]): float array, [-1, 1].
Returns:
float: dBFS
"""
return rms_to_dbfs(
math.sqrt(np.mean(np.square(sample_data, dtype=np.float64))))
def gain_db_to_ratio(gain_db: float):
"""dB to ratio
Args:
gain_db (float): gain in dB
Returns:
float: scale in amp
"""
return math.pow(10.0, gain_db / 20.0)
def normalize_audio(sample_data: np.ndarry, dbfs: float=-3.0103):
"""Nomalize audio to dBFS.
Args:
sample_data (np.ndarry): input wave samples, [-1, 1].
dbfs (float, optional): target dBFS. Defaults to -3.0103.
Returns:
np.ndarry: normalized wave
"""
return np.maximum(
np.minimum(sample_data * gain_db_to_ratio(dbfs - max_dbfs(sample_data)),
1.0), -1.0)

@ -133,7 +133,7 @@ class ConformerEncoderLayer(nn.Layer):
def __init__( def __init__(
self, self,
size: int, size: int,
self_attn: int, self_attn: nn.Layer,
feed_forward: Optional[nn.Layer]=None, feed_forward: Optional[nn.Layer]=None,
feed_forward_macaron: Optional[nn.Layer]=None, feed_forward_macaron: Optional[nn.Layer]=None,
conv_module: Optional[nn.Layer]=None, conv_module: Optional[nn.Layer]=None,

@ -110,4 +110,4 @@ def log_add(args: List[int]) -> float:
return -float('inf') return -float('inf')
a_max = max(args) a_max = max(args)
lsp = math.log(sum(math.exp(a - a_max) for a in args)) lsp = math.log(sum(math.exp(a - a_max) for a in args))
return a_max + lsp return a_max + lsp

@ -6,3 +6,4 @@ tensorboardX
yacs yacs
typeguard typeguard
pre-commit pre-commit
paddlepaddle-gpu==2.0.0

Loading…
Cancel
Save