pull/578/head
Hui Zhang 4 years ago
parent 2aed275233
commit 1635e000b3

@ -22,9 +22,12 @@ from paddle.io import Dataset
from deepspeech.frontend.audio import AudioSegment from deepspeech.frontend.audio import AudioSegment
from deepspeech.frontend.utility import load_cmvn from deepspeech.frontend.utility import load_cmvn
from deepspeech.frontend.utility import read_manifest from deepspeech.frontend.utility import read_manifest
from deepspeech.utils.log import Log
__all__ = ["FeatureNormalizer"] __all__ = ["FeatureNormalizer"]
logger = Log(__name__).getlog()
# https://github.com/PaddlePaddle/Paddle/pull/31481 # https://github.com/PaddlePaddle/Paddle/pull/31481
class CollateFunc(object): class CollateFunc(object):
@ -176,7 +179,7 @@ class FeatureNormalizer(object):
wav_number += batch_size wav_number += batch_size
if wav_number % 1000 == 0: if wav_number % 1000 == 0:
print('process {} wavs,{} frames'.format(wav_number, logger.info('process {} wavs,{} frames'.format(wav_number,
all_number)) all_number))
self.cmvn_info = { self.cmvn_info = {

@ -17,6 +17,12 @@ import os
import socket import socket
import sys import sys
FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'
logging.basicConfig(
level=logging.DEBUG, format=FORMAT_STR, datefmt=DATE_FMT_STR)
def find_log_dir(log_dir=None): def find_log_dir(log_dir=None):
"""Returns the most suitable directory to put log files into. """Returns the most suitable directory to put log files into.
@ -123,12 +129,10 @@ class Log():
pass pass
if not self.logger.hasHandlers(): if not self.logger.hasHandlers():
format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' formatter = logging.Formatter(fmt=FORMAT_STR, datefmt=DATE_FMT_STR)
formatter = logging.Formatter(
fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
fh = logging.FileHandler(Log.log_name) fh = logging.FileHandler(Log.log_name)
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
self.logger.addHandler(fh) self.logger.addHandler(fh)
ch = logging.StreamHandler() ch = logging.StreamHandler()
@ -136,9 +140,6 @@ class Log():
ch.setFormatter(formatter) ch.setFormatter(formatter)
self.logger.addHandler(ch) self.logger.addHandler(ch)
#fh.close()
#ch.close()
# stop propagate for propagating may print # stop propagate for propagating may print
# log multiple times # log multiple times
self.logger.propagate = False self.logger.propagate = False

@ -51,6 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--stride_ms=10.0 \ --stride_ms=10.0 \
--window_ms=25.0 \ --window_ms=25.0 \
--sample_rate=16000 \ --sample_rate=16000 \
--use_dB_normalization=False \
--num_samples=-1 \ --num_samples=-1 \
--num_workers=16 \ --num_workers=16 \
--output_path="data/mean_std.json" --output_path="data/mean_std.json"

@ -73,6 +73,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate=16000 \ --sample_rate=16000 \
--stride_ms=10.0 \ --stride_ms=10.0 \
--window_ms=25.0 \ --window_ms=25.0 \
--use_dB_normalization=False \
--num_workers=${num_workers} \ --num_workers=${num_workers} \
--output_path="data/mean_std.json" --output_path="data/mean_std.json"

@ -57,6 +57,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate=16000 \ --sample_rate=16000 \
--stride_ms=10.0 \ --stride_ms=10.0 \
--window_ms=25.0 \ --window_ms=25.0 \
--use_dB_normalization=False \
--num_workers=2 \ --num_workers=2 \
--output_path="data/mean_std.json" --output_path="data/mean_std.json"

@ -21,6 +21,8 @@ import paddle
def main(args): def main(args):
paddle.set_device('cpu')
val_scores = [] val_scores = []
beat_val_scores = [] beat_val_scores = []
selected_epochs = [] selected_epochs = []

@ -25,17 +25,19 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('num_samples', int, -1, "# of samples to for statistics.") add_arg('num_samples', int, -1, "# of samples to for statistics.")
add_arg('specgram_type', str, add_arg('specgram_type', str,
'linear', 'linear',
"Audio feature type. Options: linear, mfcc, fbank.", "Audio feature type. Options: linear, mfcc, fbank.",
choices=['linear', 'mfcc', 'fbank']) choices=['linear', 'mfcc', 'fbank'])
add_arg('feat_dim', int, 13, "Audio feature dim.") add_arg('feat_dim', int, 13, "Audio feature dim.")
add_arg('delta_delta', bool, add_arg('delta_delta', bool, False, "Audio feature with delta delta.")
False,
"Audio feature with delta delta.")
add_arg('stride_ms', float, 10.0, "stride length in ms.") add_arg('stride_ms', float, 10.0, "stride length in ms.")
add_arg('window_ms', float, 20.0, "stride length in ms.") add_arg('window_ms', float, 20.0, "stride length in ms.")
add_arg('sample_rate', int, 16000, "target sample rate.") add_arg('sample_rate', int, 16000, "target sample rate.")
add_arg('use_dB_normalization', bool, False, "do dB normalization.")
add_arg('target_dB', int, -20, "target dB.")
add_arg('manifest_path', str, add_arg('manifest_path', str,
'data/librispeech/manifest.train', 'data/librispeech/manifest.train',
"Filepath of manifest to compute normalizer's mean and stddev.") "Filepath of manifest to compute normalizer's mean and stddev.")
@ -63,8 +65,8 @@ def main():
n_fft=None, n_fft=None,
max_freq=None, max_freq=None,
target_sample_rate=args.sample_rate, target_sample_rate=args.sample_rate,
use_dB_normalization=True, use_dB_normalization=args.use_dB_normalization,
target_dB=-20, target_dB=args.target_dB,
dither=0.0) dither=0.0)
def augment_and_featurize(audio_segment): def augment_and_featurize(audio_segment):

Loading…
Cancel
Save