From ffb5756787450a8e808b246b4c92f51a4c3163b3 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Sat, 17 Apr 2021 04:20:31 +0000 Subject: [PATCH] fix logger and cmvn --- deepspeech/__init__.py | 1 + deepspeech/frontend/normalizer.py | 11 ++++--- deepspeech/frontend/utility.py | 10 ------- deepspeech/utils/log.py | 38 +++++++++++-------------- examples/aishell/s0/local/data.sh | 3 +- examples/aishell/s1/conf/conformer.yaml | 4 +-- examples/tiny/s0/local/data.sh | 6 +++- examples/tiny/s1/conf/conformer.yaml | 4 +-- utils/format_data.py | 6 ++-- 9 files changed, 37 insertions(+), 46 deletions(-) diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index 0257dbe55..1c664aa3f 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -22,6 +22,7 @@ from paddle.fluid import core from paddle.nn import functional as F from deepspeech.utils.log import Log + #TODO(Hui Zhang): remove fluid import logger = Log(__name__).getlog() diff --git a/deepspeech/frontend/normalizer.py b/deepspeech/frontend/normalizer.py index f681b289b..cbb82b8e3 100644 --- a/deepspeech/frontend/normalizer.py +++ b/deepspeech/frontend/normalizer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Contains feature normalizers.""" +import json import random import numpy as np @@ -22,12 +23,9 @@ from paddle.io import Dataset from deepspeech.frontend.audio import AudioSegment from deepspeech.frontend.utility import load_cmvn from deepspeech.frontend.utility import read_manifest -from deepspeech.utils.log import Log __all__ = ["FeatureNormalizer"] -logger = Log(__name__).getlog() - class CollateFunc(object): ''' Collate function for AudioDataset @@ -171,7 +169,8 @@ class FeatureNormalizer(object): collate_func = CollateFunc() - dataset = AudioDataset(manifest_path, featurize_func, num_samples) + dataset = AudioDataset(manifest_path, featurize_func, num_samples, + self._rng) batch_size = 20 data_loader = DataLoader( @@ -198,8 +197,8 @@ class FeatureNormalizer(object): wav_number += batch_size if wav_number % 1000 == 0: - logger.info('process {} wavs,{} frames'.format( - wav_number, int(all_number))) + print('process {} wavs,{} frames'.format(wav_number, + int(all_number))) self.cmvn_info = { 'mean_stat': list(all_mean_stat.tolist()), diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index 5a4989d62..b2dd9601f 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -235,14 +235,6 @@ def _load_kaldi_cmvn(kaldi_cmvn_file): return cmvn -def _load_npz_cmvn(npz_cmvn_file, eps=1e-20): - npzfile = np.load(npz_cmvn_file) - means = npzfile["mean"] #(1, D) - istd = npzfile["istd"] #(1, D) - cmvn = np.array([means, istd]) - return cmvn - - def load_cmvn(cmvn_file: str, filetype: str): """load cmvn from file. @@ -262,8 +254,6 @@ def load_cmvn(cmvn_file: str, filetype: str): cmvn = _load_json_cmvn(cmvn_file) elif filetype == "kaldi": cmvn = _load_kaldi_cmvn(cmvn_file) - elif filetype == "npz": - cmvn = _load_npz_cmvn(cmvn_file) else: raise ValueError(f"cmvn file type no support: {filetype}") return cmvn[0], cmvn[1] diff --git a/deepspeech/utils/log.py b/deepspeech/utils/log.py index 4f3ade870..1de59730f 100644 --- a/deepspeech/utils/log.py +++ b/deepspeech/utils/log.py @@ -16,7 +16,6 @@ import logging import os import socket import sys -import time def find_log_dir(log_dir=None): @@ -106,16 +105,13 @@ class Log(): actual_log_dir, file_prefix, symlink_prefix = find_log_dir_and_names( program_name=None, log_dir=self.log_dir) - basename = '%s.INFO.%s.%d' % ( - file_prefix, - time.strftime('%Y%m%d-%H%M', time.localtime(time.time())), - os.getpid()) + basename = '%s.DEBUG.%d' % (file_prefix, os.getpid()) filename = os.path.join(actual_log_dir, basename) if Log.log_name is None: Log.log_name = filename # Create a symlink to the log file with a canonical name. - symlink = os.path.join(actual_log_dir, symlink_prefix + '.INFO') + symlink = os.path.join(actual_log_dir, symlink_prefix + '.DEBUG') try: if os.path.islink(symlink): os.unlink(symlink) @@ -126,26 +122,26 @@ class Log(): # we can't modify it pass - fh = logging.FileHandler(Log.log_name) - fh.setLevel(logging.DEBUG) + if not self.logger.hasHandlers(): + format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' + formatter = logging.Formatter( + fmt=format, datefmt='%Y/%m/%d %H:%M:%S') + fh = logging.FileHandler(Log.log_name) + fh.setFormatter(formatter) + fh.setLevel(logging.DEBUG) + self.logger.addHandler(fh) - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + ch.setFormatter(formatter) + self.logger.addHandler(ch) - format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' - formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S') - fh.setFormatter(formatter) - ch.setFormatter(formatter) - - self.logger.addHandler(fh) - self.logger.addHandler(ch) + #fh.close() + #ch.close() # stop propagate for propagating may print # log multiple times - # self.logger.propagate = False - - fh.close() - ch.close() + self.logger.propagate = False def getlog(self): return self.logger diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index 5af19e758..d29fb8bf3 100644 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -42,8 +42,9 @@ python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ --stride_ms=10.0 \ --window_ms=25.0 \ --sample_rate=16000 \ +--num_samples=2000 \ --num_workers=0 \ ---output_path="data/mean_std.npz" +--output_path="data/mean_std.json" if [ $? -ne 0 ]; then echo "Compute mean and stddev failed. Terminated." diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index 40c40629f..7c7d866fb 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -34,8 +34,8 @@ data: # network architecture model: - cmvn_file: "data/mean_std.npz" - cmvn_file_type: "npz" + cmvn_file: "data/mean_std.json" + cmvn_file_type: "json" # encoder related encoder: conformer encoder_conf: diff --git a/examples/tiny/s0/local/data.sh b/examples/tiny/s0/local/data.sh index c742aad54..234e87d28 100644 --- a/examples/tiny/s0/local/data.sh +++ b/examples/tiny/s0/local/data.sh @@ -43,7 +43,11 @@ python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ --specgram_type="fbank" \ --feat_dim=80 \ --delta_delta=false \ ---output_path="data/mean_std.npz" +--sample_rate=16000 \ +--stride_ms=10.0 \ +--window_ms=25.0 \ +--num_workers=0 \ +--output_path="data/mean_std.json" if [ $? -ne 0 ]; then echo "Compute mean and stddev failed. Terminated." diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml index 09f877470..e1ab02af2 100644 --- a/examples/tiny/s1/conf/conformer.yaml +++ b/examples/tiny/s1/conf/conformer.yaml @@ -74,8 +74,8 @@ data: # network architecture model: - cmvn_file: "data/mean_std.npz" - cmvn_file_type: "npz" + cmvn_file: "data/mean_std.json" + cmvn_file_type: "json" # encoder related encoder: conformer encoder_conf: diff --git a/utils/format_data.py b/utils/format_data.py index 235cc8cbe..9565da14a 100644 --- a/utils/format_data.py +++ b/utils/format_data.py @@ -27,7 +27,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('feat_type', str, "raw", "speech feature type, e.g. raw(wav, flac), kaldi") add_arg('cmvn_path', str, - 'examples/librispeech/data/mean_std.npz', + 'examples/librispeech/data/mean_std.json', "Filepath of cmvn.") add_arg('unit_type', str, "char", "Unit type, e.g. char, word, spm") add_arg('vocab_path', str, @@ -52,8 +52,8 @@ def main(): fout = open(args.output_path, 'w', encoding='utf-8') # get feat dim - mean, std = load_cmvn(args.cmvn_path, filetype='npz') - feat_dim = mean.shape[1] #(1, D) + mean, std = load_cmvn(args.cmvn_path, filetype='json') + feat_dim = mean.shape[0] #(D) print(f"Feature dim: {feat_dim}") text_feature = TextFeaturizer(args.unit_type, args.vocab_path, args.spm_model_prefix)