diff --git a/deepspeech/exps/deepspeech2/bin/deploy/runtime.py b/deepspeech/exps/deepspeech2/bin/deploy/runtime.py index 737d6432a..8212fd864 100644 --- a/deepspeech/exps/deepspeech2/bin/deploy/runtime.py +++ b/deepspeech/exps/deepspeech2/bin/deploy/runtime.py @@ -168,7 +168,7 @@ if __name__ == "__main__": default=False, help="Whether use gpu.") args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() @@ -180,7 +180,7 @@ if __name__ == "__main__": print(config) args.warmup_manifest = config.data.test_manifest - print_arguments(args) + print_arguments(args, globals()) if args.dump_config: with open(args.dump_config, 'w') as f: diff --git a/deepspeech/exps/deepspeech2/bin/deploy/server.py b/deepspeech/exps/deepspeech2/bin/deploy/server.py index ff545b196..89023c85e 100644 --- a/deepspeech/exps/deepspeech2/bin/deploy/server.py +++ b/deepspeech/exps/deepspeech2/bin/deploy/server.py @@ -98,7 +98,7 @@ if __name__ == "__main__": "Directory to save demo audios.") add_arg('warmup_manifest', str, None, "Filepath of manifest to warm up.") args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() @@ -110,7 +110,7 @@ if __name__ == "__main__": print(config) args.warmup_manifest = config.data.test_manifest - print_arguments(args) + print_arguments(args, globals()) if args.dump_config: with open(args.dump_config, 'w') as f: diff --git a/deepspeech/exps/deepspeech2/bin/infer.py b/deepspeech/exps/deepspeech2/bin/infer.py index 815ca833c..9dda38513 100644 --- a/deepspeech/exps/deepspeech2/bin/infer.py +++ b/deepspeech/exps/deepspeech2/bin/infer.py @@ -33,7 +33,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/deepspeech2/bin/test.py b/deepspeech/exps/deepspeech2/bin/test.py index 090ee0de6..0f5ec4000 100644 --- a/deepspeech/exps/deepspeech2/bin/test.py +++ b/deepspeech/exps/deepspeech2/bin/test.py @@ -32,7 +32,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/deepspeech2/bin/train.py b/deepspeech/exps/deepspeech2/bin/train.py index f2df5fa9c..ef43d793a 100644 --- a/deepspeech/exps/deepspeech2/bin/train.py +++ b/deepspeech/exps/deepspeech2/bin/train.py @@ -37,7 +37,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/deepspeech2/bin/tune.py b/deepspeech/exps/deepspeech2/bin/tune.py index dafa6e041..5efb433fa 100644 --- a/deepspeech/exps/deepspeech2/bin/tune.py +++ b/deepspeech/exps/deepspeech2/bin/tune.py @@ -168,7 +168,7 @@ if __name__ == "__main__": add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/u2/bin/export.py b/deepspeech/exps/u2/bin/export.py index a97378144..1fa0799ee 100644 --- a/deepspeech/exps/u2/bin/export.py +++ b/deepspeech/exps/u2/bin/export.py @@ -33,7 +33,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/u2/bin/test.py b/deepspeech/exps/u2/bin/test.py index dfde68e1a..7d69c392a 100644 --- a/deepspeech/exps/u2/bin/test.py +++ b/deepspeech/exps/u2/bin/test.py @@ -34,7 +34,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/exps/u2/bin/train.py b/deepspeech/exps/u2/bin/train.py index 0e1f40250..5afabbb81 100644 --- a/deepspeech/exps/u2/bin/train.py +++ b/deepspeech/exps/u2/bin/train.py @@ -38,7 +38,7 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() args = parser.parse_args() - print_arguments(args) + print_arguments(args, globals()) # https://yaml.org/type/float.html config = get_cfg_defaults() diff --git a/deepspeech/frontend/featurizer/text_featurizer.py b/deepspeech/frontend/featurizer/text_featurizer.py index d70f88f44..a704e37ef 100644 --- a/deepspeech/frontend/featurizer/text_featurizer.py +++ b/deepspeech/frontend/featurizer/text_featurizer.py @@ -34,11 +34,12 @@ class TextFeaturizer(object): """ assert unit_type in ('char', 'spm', 'word') self.unit_type = unit_type - self._vocab_dict, self._id2token, self._vocab_list = self._load_vocabulary_from_file( - vocab_filepath) self.unk = UNK - self.unk_id = self._vocab_list.index(self.unk) - self.eos_id = self._vocab_list.index(EOS) + if vocab_filepath: + self._vocab_dict, self._id2token, self._vocab_list = self._load_vocabulary_from_file( + vocab_filepath) + self.unk_id = self._vocab_list.index(self.unk) + self.eos_id = self._vocab_list.index(EOS) if unit_type == 'spm': spm_model = spm_model_prefix + '.model' diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py index 7db01dd19..3407e2e8f 100644 --- a/deepspeech/utils/utility.py +++ b/deepspeech/utils/utility.py @@ -13,6 +13,7 @@ # limitations under the License. """Contains common utility functions.""" +import os import math import distutils.util from typing import List @@ -20,7 +21,7 @@ from typing import List __all__ = ['print_arguments', 'add_arguments', "log_add"] -def print_arguments(args): +def print_arguments(args, info=None): """Print argparse's arguments. Usage: @@ -35,10 +36,14 @@ def print_arguments(args): :param args: Input argparse.Namespace for printing. :type args: argparse.Namespace """ - print("----------- Configuration Arguments -----------") + filename = "" + if info: + filename = info["__file__"] + filename = os.path.basename(filename) + print(f"----------- {filename} Configuration Arguments -----------") for arg, value in sorted(vars(args).items()): print("%s: %s" % (arg, value)) - print("------------------------------------------------") + print("-----------------------------------------------------------") def add_arguments(argname, type, default, help, argparser, **kwargs): diff --git a/examples/aishell/README.md b/examples/aishell/README.md index e109e1ae4..11e022d02 100644 --- a/examples/aishell/README.md +++ b/examples/aishell/README.md @@ -1 +1,2 @@ * s0 for deepspeech2 +* s1 for u2 diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index 6eeb3d8fc..85acb23ea 100644 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -1,12 +1,11 @@ #! /usr/bin/env bash mkdir -p data - TARGET_DIR=${MAIN_ROOT}/examples/dataset mkdir -p ${TARGET_DIR} # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/aishell/aishell.py \ +python3 ${TARGET_DIR}/aishell/aishell.py \ --manifest_prefix="data/manifest" \ --target_dir="${TARGET_DIR}/aishell" @@ -16,11 +15,17 @@ if [ $? -ne 0 ]; then fi +for dataset in train dev test; do + mv data/manifest.${dataset} data/manifest.${dataset}.raw +done + + # build vocabulary python3 ${MAIN_ROOT}/utils/build_vocab.py \ +--unit_type="char" \ --count_threshold=0 \ --vocab_path="data/vocab.txt" \ ---manifest_paths "data/manifest.train" "data/manifest.dev" +--manifest_paths "data/manifest.train.raw" "data/manifest.dev.raw" if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." @@ -30,9 +35,11 @@ fi # compute mean and stddev for normalizer python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ ---manifest_path="data/manifest.train" \ +--manifest_path="data/manifest.train.raw" \ --num_samples=2000 \ ---specgram_type="linear" \ +--specgram_type="fbank" \ +--feat_dim=80 \ +--delta_delta=false \ --output_path="data/mean_std.npz" if [ $? -ne 0 ]; then @@ -41,5 +48,21 @@ if [ $? -ne 0 ]; then fi +# format manifest with tokenids, vocab size +for dataset in train dev test; do + python3 ${MAIN_ROOT}/utils/format_data.py \ + --feat_type "raw" \ + --cmvn_path "data/mean_std.npz" \ + --unit_type "char" \ + --vocab_path="data/vocab.txt" \ + --manifest_path="data/manifest.${dataset}.raw" \ + --output_path="data/manifest.${dataset}" +done + +if [ $? -ne 0 ]; then + echo "Formt mnaifest failed. Terminated." + exit 1 +fi + echo "Aishell data preparation done." exit 0 diff --git a/examples/aishell/s0/local/download_model.sh b/examples/aishell/s0/local/download_model.sh deleted file mode 100644 index 2f9f40fb3..000000000 --- a/examples/aishell/s0/local/download_model.sh +++ /dev/null @@ -1,22 +0,0 @@ -#! /usr/bin/env bash - -. ${MAIN_ROOT}/utils/utility.sh - -DIR=data/pretrain -mkdir -p ${DIR} - -URL='https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz' -MD5=2bf0cc8b6d5da2a2a787b5cc36a496b5 -TARGET=${DIR}/aishell_model_fluid.tar.gz - - -echo "Download Aishell model ..." -download $URL $MD5 $TARGET -if [ $? -ne 0 ]; then - echo "Fail to download Aishell model!" - exit 1 -fi -tar -zxvf $TARGET -C ${DIR} - - -exit 0 diff --git a/examples/aishell/s1/conf/augmentation.config b/examples/aishell/s1/conf/augmentation.config new file mode 100644 index 000000000..6c24da549 --- /dev/null +++ b/examples/aishell/s1/conf/augmentation.config @@ -0,0 +1,8 @@ +[ + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + } +] diff --git a/examples/aishell/s1/local/data.sh b/examples/aishell/s1/local/data.sh new file mode 120000 index 000000000..7c4cf4564 --- /dev/null +++ b/examples/aishell/s1/local/data.sh @@ -0,0 +1 @@ +../../s0/local/data.sh \ No newline at end of file diff --git a/examples/aishell/s1/local/download_lm_ch.sh b/examples/aishell/s1/local/download_lm_ch.sh new file mode 120000 index 000000000..6541d91c5 --- /dev/null +++ b/examples/aishell/s1/local/download_lm_ch.sh @@ -0,0 +1 @@ +../../s0/local/download_lm_ch.sh \ No newline at end of file diff --git a/examples/aishell/s1/local/export.sh b/examples/aishell/s1/local/export.sh new file mode 100644 index 000000000..1b5533916 --- /dev/null +++ b/examples/aishell/s1/local/export.sh @@ -0,0 +1,20 @@ +#! /usr/bin/env bash + +if [ $# != 2 ];then + echo "usage: export ckpt_path jit_model_path" + exit -1 +fi + +python3 -u ${BIN_DIR}/export.py \ +--config conf/deepspeech2.yaml \ +--checkpoint_path ${1} \ +--export_path ${2} + + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh new file mode 100644 index 000000000..0872ff21e --- /dev/null +++ b/examples/aishell/s1/local/test.sh @@ -0,0 +1,21 @@ +#! /usr/bin/env bash + +# download language model +bash local/download_lm_ch.sh +if [ $? -ne 0 ]; then + exit 1 +fi + +python3 -u ${BIN_DIR}/test.py \ +--device 'gpu' \ +--nproc 1 \ +--config conf/deepspeech2.yaml \ +--output ckpt + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh new file mode 100644 index 000000000..c286566a8 --- /dev/null +++ b/examples/aishell/s1/local/train.sh @@ -0,0 +1,23 @@ +#! /usr/bin/env bash + +# train model +# if you wish to resume from an exists model, uncomment --init_from_pretrained_model +export FLAGS_sync_nccl_allreduce=0 + +ngpu=$(echo ${CUDA_VISIBLE_DEVICES} | python -c 'import sys; a = sys.stdin.read(); print(len(a.split(",")));') +echo "using $ngpu gpus..." + +python3 -u ${BIN_DIR}/train.py \ +--device 'gpu' \ +--nproc ${ngpu} \ +--config conf/deepspeech2.yaml \ +--output ckpt-${1} + + +if [ $? -ne 0 ]; then + echo "Failed in training!" + exit 1 +fi + + +exit 0 diff --git a/examples/aishell/s1/path.sh b/examples/aishell/s1/path.sh new file mode 100644 index 000000000..14e4fc3ec --- /dev/null +++ b/examples/aishell/s1/path.sh @@ -0,0 +1,14 @@ +export MAIN_ROOT=${PWD}/../../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} + +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ + + +MODEL=u2 +export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin diff --git a/examples/aishell/s1/run.sh b/examples/aishell/s1/run.sh new file mode 100644 index 000000000..8beb6bf0f --- /dev/null +++ b/examples/aishell/s1/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +source path.sh +# only demos + +# prepare data +bash ./local/data.sh + +# train model +CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh + +# test model +CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh + +# infer model +CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284 + +# export model +bash ./local/export.sh ckpt/checkpoints/step-3284 jit.model \ No newline at end of file diff --git a/examples/tiny/s0/local/data.sh b/examples/tiny/s0/local/data.sh index 410bff395..c742aad54 100644 --- a/examples/tiny/s0/local/data.sh +++ b/examples/tiny/s0/local/data.sh @@ -5,7 +5,7 @@ TARGET_DIR=${MAIN_ROOT}/examples/dataset mkdir -p ${TARGET_DIR} # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/librispeech/librispeech.py \ +python3 ${TARGET_DIR}/librispeech/librispeech.py \ --manifest_prefix="data/manifest" \ --target_dir="${TARGET_DIR}/librispeech" \ --full_download="False" @@ -24,7 +24,7 @@ bpeprefix="data/bpe_${bpemode}_${nbpe}" # build vocabulary python3 ${MAIN_ROOT}/utils/build_vocab.py \ --unit_type "spm" \ ---vocab_size=${nbpe} \ +--spm_vocab_size=${nbpe} \ --spm_mode ${bpemode} \ --spm_model_prefix ${bpeprefix} \ --vocab_path="data/vocab.txt" \ diff --git a/utils/build_vocab.py b/utils/build_vocab.py index 6f496ef7f..dc0056561 100644 --- a/utils/build_vocab.py +++ b/utils/build_vocab.py @@ -45,9 +45,9 @@ add_arg('manifest_paths', str, nargs='+', required=True) # bpe -add_arg('vocab_size', int, 0, "Vocab size for spm.") +add_arg('spm_vocab_size', int, 0, "Vocab size for spm.") add_arg('spm_mode', str, 'unigram', "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm") -add_arg('spm_model_prefix', str, "spm_model_%(spm_mode)_%(count_threshold)", "spm model prefix, only need when `unit_type` is spm") +add_arg('spm_model_prefix', str, "", "spm_model_%(spm_mode)_%(count_threshold), spm model prefix, only need when `unit_type` is spm") # yapf: disable args = parser.parse_args() @@ -64,7 +64,7 @@ def dump_text_manifest(fileobj, manifest_path): fileobj.write(line_json['text'] + "\n") def main(): - print_arguments(args) + print_arguments(args, globals()) fout = open(args.vocab_path, 'w', encoding='utf-8') fout.write(BLANK + "\n") # 0 will be used for "blank" in CTC @@ -91,7 +91,7 @@ def main(): os.unlink(fp.name) # encode - text_feature = TextFeaturizer(args.unit_type, args.vocab_path, args.spm_model_prefix) + text_feature = TextFeaturizer(args.unit_type, "", args.spm_model_prefix) counter = Counter() for manifest_path in args.manifest_paths: diff --git a/utils/compute_mean_std.py b/utils/compute_mean_std.py index 29fadbada..eebbbb923 100644 --- a/utils/compute_mean_std.py +++ b/utils/compute_mean_std.py @@ -46,7 +46,7 @@ args = parser.parse_args() def main(): - print_arguments(args) + print_arguments(args, globals()) augmentation_pipeline = AugmentationPipeline('{}') audio_featurizer = AudioFeaturizer( diff --git a/utils/format_data.py b/utils/format_data.py index 6ba2e2b06..356a9c7d1 100644 --- a/utils/format_data.py +++ b/utils/format_data.py @@ -48,12 +48,12 @@ args = parser.parse_args() def main(): - print_arguments(args) + print_arguments(args, globals()) fout = open(args.output_path, 'w', encoding='utf-8') # get feat dim mean, std = load_cmvn(args.cmvn_path, filetype='npz') - feat_dim = mean.shape[0] + feat_dim = mean.shape[1] #(1, D) print(f"Feature dim: {feat_dim}") text_feature = TextFeaturizer(args.unit_type, args.vocab_path, args.spm_model_prefix)