diff --git a/.gitignore b/.gitignore index 30aca84c..260c80bd 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ tools/kenlm tools/sox-14.4.2 tools/soxbindings tools/Montreal-Forced-Aligner/ + +*output/ diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 5e659092..27d4f94c 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -34,9 +34,8 @@ from deepspeech.training.trainer import Trainer from deepspeech.utils import error_rate from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools -from deepspeech.utils.log import Log from deepspeech.utils.log import Autolog - +from deepspeech.utils.log import Log logger = Log(__name__).getlog() @@ -226,8 +225,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): def __init__(self, config, args): super().__init__(config, args) - self.autolog = Autolog(batch_size = config.decoding.batch_size, model_name = "deepspeech2", model_precision = "fp32").getlog() - + def ordid2token(self, texts, texts_len): """ ord() id to chr() chr """ trans = [] @@ -294,6 +292,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): @paddle.no_grad() def test(self): logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}") + self.autolog = Autolog( + batch_size=self.config.decoding.batch_size, + model_name="deepspeech2", + model_precision="fp32").getlog() self.model.eval() cfg = self.config error_rate_type = None diff --git a/deepspeech/utils/log.py b/deepspeech/utils/log.py index d5522d8d..aefc8b59 100644 --- a/deepspeech/utils/log.py +++ b/deepspeech/utils/log.py @@ -18,11 +18,8 @@ import socket import sys import auto_log -import os from paddle import inference - - FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' DATE_FMT_STR = '%Y/%m/%d %H:%M:%S' @@ -153,28 +150,29 @@ class Log(): def getlog(self): return self.logger -class Autolog: - def __init__(self, batch_size, model_name = "DeepSpeech", model_precision = "fp32"): +class Autolog: + def __init__(self, + batch_size, + model_name="DeepSpeech", + model_precision="fp32"): pid = os.getpid() gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0]) infer_config = inference.Config() infer_config.enable_use_gpu(100, gpu_id) autolog = auto_log.AutoLogger( - model_name = model_name, - model_precision = model_precision, - batch_size = batch_size, + model_name=model_name, + model_precision=model_precision, + batch_size=batch_size, data_shape="dynamic", save_path="./output/auto_log.lpg", - inference_config = infer_config, - pids = pid, - process_name = None, - gpu_ids = gpu_id, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], + inference_config=infer_config, + pids=pid, + process_name=None, + gpu_ids=gpu_id, + time_keys=['preprocess_time', 'inference_time', 'postprocess_time'], warmup=0) self.autolog = autolog - + def getlog(self): return self.autolog diff --git a/examples/dataset/timit/timit_kaldi_standard_split.py b/examples/dataset/timit/timit_kaldi_standard_split.py index beb5a63e..2b494c06 100644 --- a/examples/dataset/timit/timit_kaldi_standard_split.py +++ b/examples/dataset/timit/timit_kaldi_standard_split.py @@ -50,36 +50,36 @@ def create_manifest(data_dir, manifest_path_prefix): total_text = 0.0 total_num = 0 - phn_path = os.path.join(data_dir, dtype+'.text') + phn_path = os.path.join(data_dir, dtype + '.text') phn_dict = {} for line in codecs.open(phn_path, 'r', 'utf-8'): line = line.strip() if line == '': continue audio_id, text = line.split(' ', 1) - phn_dict[audio_id] = text + phn_dict[audio_id] = text - audio_dir = os.path.join(data_dir, dtype+'_sph.scp') + audio_dir = os.path.join(data_dir, dtype + '_sph.scp') for line in codecs.open(audio_dir, 'r', 'utf-8'): - audio_id, audio_path = line.strip().split() - # if no transcription for audio then raise error - assert audio_id in phn_dict - audio_data, samplerate = soundfile.read(audio_path) - duration = float(len(audio_data) / samplerate) - text = phn_dict[audio_id] - json_lines.append( - json.dumps( - { - 'utt': audio_id, - 'feat': audio_path, - 'feat_shape': (duration, ), # second - 'text': text - }, - ensure_ascii=False)) - - total_sec += duration - total_text += len(text) - total_num += 1 + audio_id, audio_path = line.strip().split() + # if no transcription for audio then raise error + assert audio_id in phn_dict + audio_data, samplerate = soundfile.read(audio_path) + duration = float(len(audio_data) / samplerate) + text = phn_dict[audio_id] + json_lines.append( + json.dumps( + { + 'utt': audio_id, + 'feat': audio_path, + 'feat_shape': (duration, ), # second + 'text': text + }, + ensure_ascii=False)) + + total_sec += duration + total_text += len(text) + total_num += 1 manifest_path = manifest_path_prefix + '.' + dtype + '.raw' with codecs.open(manifest_path, 'w', 'utf-8') as fout: @@ -99,9 +99,7 @@ def main(): if args.src_dir.startswith('~'): args.src_dir = os.path.expanduser(args.src_dir) - prepare_dataset( - src_dir=args.src_dir, - manifest_path=args.manifest_prefix) + prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix) print("manifest prepare done!") diff --git a/examples/timit/s1/README.md b/examples/timit/s1/README.md index 2dd8a719..4d9b146a 100644 --- a/examples/timit/s1/README.md +++ b/examples/timit/s1/README.md @@ -1,3 +1,3 @@ # TIMIT -Results will be organized and updated soon. \ No newline at end of file +Results will be organized and updated soon. diff --git a/setup.sh b/setup.sh index 0c602e61..384d62d2 100644 --- a/setup.sh +++ b/setup.sh @@ -44,18 +44,17 @@ if [ $? != 0 ]; then fi #install auto-log -python3 -c "import auto_log" +python -c "import auto_log" if [ $? != 0 ]; then info_msg "Install auto_log into default system path" - git clone https://github.com/LDOUBLEV/AutoLog + test -d AutoLog || git clone https://github.com/LDOUBLEV/AutoLog if [ $? != 0 ]; then error_msg "Download auto_log failed !!!" exit 1 fi cd AutoLog - pip3 install -r requirements.txt - python3 setup.py bdist_wheel - pip3 install ./dist/[Aa]uto*.whl + pip install -r requirements.txt + python setup.py install cd .. rm -rf AutoLog fi