diff --git a/.clang_format.hook b/.clang_format.hook deleted file mode 100755 index 4cbc972bb..000000000 --- a/.clang_format.hook +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -set -e - -readonly VERSION="3.9" - -version=$(clang-format -version) - -if ! [[ $version == *"$VERSION"* ]]; then - echo "clang-format version check failed." - echo "a version contains '$VERSION' is needed, but get '$version'" - echo "you can install the right version, and make an soft-link to '\$PATH' env" - exit -1 -fi - -clang-format $@ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ede1c53a4..7aad026c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,6 +30,12 @@ - id: clang-format name: clang-format description: Format files with ClangFormat - entry: bash .clang_format.hook -i + entry: bash .pre-commit-hooks/clang-format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ + - id: copyright_checker + name: copyright_checker + entry: python .pre-commit-hooks/copyright-check.hook + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ + exclude: (?=decoders/swig).*(\.cpp|\.h)$ \ No newline at end of file diff --git a/.pre-commit-hooks/clang-format.hook b/.pre-commit-hooks/clang-format.hook new file mode 100755 index 000000000..ceb4a7ea2 --- /dev/null +++ b/.pre-commit-hooks/clang-format.hook @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +readonly VERSION="3.9" + +version=$(clang-format -version) + +# if ! [[ $version == *"$VERSION"* ]]; then +# echo "clang-format version check failed." +# echo "a version contains '$VERSION' is needed, but get '$version'" +# echo "you can install the right version, and make an soft-link to '\$PATH' env" +# exit -1 +# fi + +clang-format $@ diff --git a/.pre-commit-hooks/copyright-check.hook b/.pre-commit-hooks/copyright-check.hook new file mode 100644 index 000000000..26044c29e --- /dev/null +++ b/.pre-commit-hooks/copyright-check.hook @@ -0,0 +1,131 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import io, re +import sys, os +import subprocess +import platform + +COPYRIGHT = ''' +Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +LANG_COMMENT_MARK = None + +NEW_LINE_MARK = None + +COPYRIGHT_HEADER = None + +if platform.system() == "Windows": + NEW_LINE_MARK = "\r\n" +else: + NEW_LINE_MARK = '\n' + COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1] + p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0) + process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE) + date, err = process.communicate() + date = date.decode("utf-8").rstrip("\n") + COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date) + + +def generate_copyright(template, lang='C'): + if lang == 'Python': + LANG_COMMENT_MARK = '#' + else: + LANG_COMMENT_MARK = "//" + + lines = template.split(NEW_LINE_MARK) + BLANK = " " + ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK + for lino, line in enumerate(lines): + if lino == 0 or lino == 1 or lino == len(lines) - 1: continue + if len(line) == 0: + BLANK = "" + else: + BLANK = " " + ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK + + return ans + "\n" + + +def lang_type(filename): + if filename.endswith(".py"): + return "Python" + elif filename.endswith(".h"): + return "C" + elif filename.endswith(".c"): + return "C" + elif filename.endswith(".hpp"): + return "C" + elif filename.endswith(".cc"): + return "C" + elif filename.endswith(".cpp"): + return "C" + elif filename.endswith(".cu"): + return "C" + elif filename.endswith(".cuh"): + return "C" + elif filename.endswith(".go"): + return "C" + elif filename.endswith(".proto"): + return "C" + else: + print("Unsupported filetype %s", filename) + exit(0) + + +PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)") + + +def main(argv=None): + parser = argparse.ArgumentParser( + description='Checker for copyright declaration.') + parser.add_argument('filenames', nargs='*', help='Filenames to check') + args = parser.parse_args(argv) + + retv = 0 + for filename in args.filenames: + fd = io.open(filename, encoding="utf-8") + first_line = fd.readline() + second_line = fd.readline() + if "COPYRIGHT (C)" in first_line.upper(): continue + if first_line.startswith("#!") or PYTHON_ENCODE.match( + second_line) != None or PYTHON_ENCODE.match(first_line) != None: + continue + original_contents = io.open(filename, encoding="utf-8").read() + new_contents = generate_copyright( + COPYRIGHT, lang_type(filename)) + original_contents + print('Auto Insert Copyright Header {}'.format(filename)) + retv = 1 + with io.open(filename, 'w') as output_file: + output_file.write(new_contents) + + return retv + + +if __name__ == '__main__': + exit(main()) \ No newline at end of file diff --git a/README.md b/README.md index 521351c37..7d99ef99b 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,17 @@ ## Table of Contents - [Installation](#installation) -- [Running in Docker Container](#running-in-docker-container) - [Getting Started](#getting-started) - [Data Preparation](#data-preparation) - [Training a Model](#training-a-model) -- [Data Augmentation Pipeline](#data-augmentation-pipeline) - [Inference and Evaluation](#inference-and-evaluation) - [Hyper-parameters Tuning](#hyper-parameters-tuning) -- [Training for Mandarin Language](#training-for-mandarin-language) - [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice) -- [Released Models](#released-models) - [Experiments and Benchmarks](#experiments-and-benchmarks) +- [Released Models](#released-models) - [Questions and Help](#questions-and-help) - ## Installation To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. @@ -105,41 +101,16 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org ``` Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If you would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead. -- Prepare the data - - ```bash - sh run_data.sh - ``` - - `run_data.sh` will download dataset, generate manifests, collect normalizer's statistics and build vocabulary. Once the data preparation is done, you will find the data (only part of LibriSpeech) downloaded in `./dataset/librispeech` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time you run this dataset and is reusable for all further experiments. -- Train your own ASR model - - ```bash - sh run_train.sh - ``` - - `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. These checkpoints could be used for training resuming, inference, evaluation and deployment. -- Case inference with an existing model - - ```bash - sh run_infer.sh - ``` - - `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, you can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: - +- Source env + ```bash - sh run_infer_golden.sh + source path.sh ``` -- Evaluate an existing model + Set `MAIN_ROOT` as project dir. +- Main entrypoint ```bash - sh run_test.sh - ``` - - `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, you can also download a well-trained model and test its performance: - - ```bash - sh run_test_golden.sh + bash run.sh ``` More detailed information are provided in the following sections. Wish you a happy journey with the *DeepSpeech2 on PaddlePaddle* ASR engine! @@ -158,7 +129,7 @@ More detailed information are provided in the following sections. Wish you a hap To use your custom data, you only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels. -For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset. +For how to generate such manifest files, please refer to `examples/librispeech/local/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset. ### Compute Mean & Stddev for Normalizer @@ -168,11 +139,11 @@ To perform z-score normalization (zero-mean, unit stddev) upon audio features, w python3 tools/compute_mean_std.py \ --num_samples 2000 \ --specgram_type linear \ ---manifest_path data/librispeech/manifest.train \ ---output_path data/librispeech/mean_std.npz +--manifest_path examples/librispeech/data/manifest.train \ +--output_path examples/librispeech/data/mean_std.npz ``` -It will compute the mean and standard deviatio of power spectrum feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage. +It will compute the mean and standard deviatio of power spectrum feature with 2000 random sampled audio clips listed in `examples/librispeech/data/manifest.train` and save the results to `examples/librispeech/data/mean_std.npz` for further usage. ### Build Vocabulary @@ -182,18 +153,18 @@ A vocabulary of possible characters is required to convert the transcription int ```bash python3 tools/build_vocab.py \ --count_threshold 0 \ ---vocab_path data/librispeech/eng_vocab.txt \ ---manifest_paths data/librispeech/manifest.train +--vocab_path examples/librispeech/data/eng_vocab.txt \ +--manifest_paths examples/librispeech/data/manifest.train ``` -It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without vocabulary truncation (`--count_threshold 0`). +It will write a vocabuary file `examples/librispeech/data/eng_vocab.txt` with all transcription text in `examples/librispeech/data/manifest.train`, without vocabulary truncation (`--count_threshold 0`). ### More Help For more help on arguments: ```bash -python3 data/librispeech/librispeech.py --help +python3 examples/librispeech/local/librispeech.py --help python3 tools/compute_mean_std.py --help python3 tools/build_vocab.py --help ``` @@ -226,10 +197,10 @@ For more help on arguments: ```bash python3 train.py --help ``` -or refer to `example/librispeech/run_train.sh`. +or refer to `example/librispeech/local/run_train.sh`. -## Data Augmentation Pipeline +### Data Augmentation Pipeline Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perturbation (label-invariant transformation) added upon raw audios. You don't have to do the syntheses on your own, as it is already embedded into the data provider and is done on the fly, randomly for each epoch during training. @@ -265,6 +236,12 @@ For other configuration examples, please refer to `conf/augmenatation.config.exa Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. + +### Training for Mandarin Language + +The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell/local```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting. + + ## Inference and Evaluation ### Prepare Language Model @@ -322,7 +299,7 @@ For more help on arguments: ``` python3 infer.py --help ``` -or refer to `example/librispeech/run_infer.sh`. +or refer to `example/librispeech/local/run_infer.sh`. ### Evaluate a Model @@ -347,7 +324,7 @@ For more help on arguments: ```bash python3 test.py --help ``` -or refer to `example/librispeech/run_test.sh`. +or refer to `example/librispeech/local/run_test.sh`. ## Hyper-parameters Tuning @@ -387,11 +364,8 @@ After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation ```bash python3 tune.py --help ``` -or refer to `example/librispeech/run_tune.sh`. +or refer to `example/librispeech/local/run_tune.sh`. -## Training for Mandarin Language - -The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting. ## Trying Live Demo with Your Own Voice @@ -429,7 +403,7 @@ Now, in the client console, press the `whitespace` key, hold, and start speaking Notice that `deploy/demo_client.py` must be run on a machine with a microphone device, while `deploy/demo_server.py` could be run on one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessible IP address and port, if the server and client are running with two separate machines. Nothing should be done if they are running on one single machine. -Please also refer to `examples/deploy_demo/run_english_demo_server.sh`, which will first download a pre-trained English model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, you can speak English to test it. If you would like to try some other models, just update `--model_path` argument in the script.   +Please also refer to `examples/deploy_demo/run_english_demo_server.sh`, which will first download a pre-trained English model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/deploy_demo/run_demo_client.sh`, you can speak English to test it. If you would like to try some other models, just update `--model_path` argument in the script.   For more help on arguments: @@ -438,24 +412,6 @@ python3 deploy/demo_server.py --help python3 deploy/demo_client.py --help ``` -## Released Models - -#### Speech Model Released - -Language | Model Name | Training Data | Hours of Speech -:-----------: | :------------: | :----------: | -------: -English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h -Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h - -#### Language Model Released - -Language Model | Training Data | Token-based | Size | Descriptions -:-------------:| :------------:| :-----: | -----: | :----------------- -[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' -[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings -[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings ## Experiments and Benchmarks @@ -494,6 +450,27 @@ We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of L `tools/profile.sh` provides such a profiling tool. + +## Released Models + +#### Speech Model Released + +Language | Model Name | Training Data | Hours of Speech +:-----------: | :------------: | :----------: | -------: +English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h +Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h + +#### Language Model Released + +Language Model | Training Data | Token-based | Size | Descriptions +:-------------:| :------------:| :-----: | -----: | :----------------- +[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings + + ## Questions and Help You are welcome to submit questions and bug reports in [Github Issues](https://github.com/PaddlePaddle/DeepSpeech/issues). You are also welcome to contribute to this project. diff --git a/README_cn.md b/README_cn.md index 176f0cd77..94825cb96 100644 --- a/README_cn.md +++ b/README_cn.md @@ -7,17 +7,14 @@ ## 目录 - [安装](#安装) -- [在 Docker 容器上运行](#在Docker容器上运行) - [开始](#开始) - [数据准备](#数据准备) - [训练模型](#训练模型) -- [数据增强流水线](#数据增强流水线) - [推断和评价](#推断和评价) - [超参数调整](#超参数调整) -- [训练汉语语言](#训练汉语语言) - [用自己的声音尝试现场演示](#用自己的声音尝试现场演示) -- [发布模型](#发布模型) - [试验和基准](#试验和基准) +- [发布模型](#发布模型) - [问题和帮助](#问题和帮助) ## 安装 @@ -102,42 +99,16 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107 cd examples/tiny ``` - 注意这仅仅是 LibriSpeech 一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。 -- 准备数据 - - ```bash - sh run_data.sh - ``` - - 运行`run_data.sh`脚本将会下载数据集,产出 manifests 文件,收集一些归一化需要的统计信息并建立词表。当数据准备完成之后,下载完的数据(仅有 LibriSpeech 一部分)在`dataset/librispeech`中;其对应的 manifest 文件,均值标准差和词表文件在`./data/tiny`中。在第一次执行的时候一定要执行这个脚本,在接下来所有的实验中我们都会用到这个数据集。 -- 训练你自己的 ASR 模型 - - ```bash - sh run_train.sh - ``` - - `run_train.sh`将会启动训练任务,训练日志会打印到终端,并且模型每个 epoch 的 checkpoint 都会保存到`./checkpoints/tiny`目录中。这些 checkpoint 可以用来恢复训练,推断,评价和部署。 -- 用已有的模型进行案例推断 - - ```bash - sh run_infer.sh - ``` - - `run_infer.sh`将会利用训练好的模型展现一些(默认 10 个)样本语音到文本的解码结果。由于当前模型只使用了 LibriSpeech 一部分数据集训练,因此性能可能不会太好。为了看到更好模型上的表现,你可以下载一个已训练好的模型(用完整的 LibriSpeech 训练了好几天)来做推断。 - - ```bash - sh run_infer_golden.sh - ``` -- 评价一个已经存在的模型 + 注意这仅仅是 LibriSpeech 一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。 +- 设置环境变量 ```bash - sh run_test.sh + source path.sh ``` - - `run_test.sh`能够利用误字率(或字符错误率)来评价模型。类似的,你可以下载一个完全训练好的模型来测试它的性能: +- 入口脚本 ```bash - sh run_test_golden.sh + bash run.sh ``` 更多细节会在接下来的章节中阐述。祝你在*DeepSpeech2*ASR引擎学习中过得愉快! @@ -156,7 +127,7 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107 如果你要使用自定义数据,你只需要按照以上格式生成自己的 manifest 文件即可。给定 manifest 文件,训练、推断以及其它所有模块都能够访问到音频数据以及对应的时长和标签数据。 -关于如何生成 manifest 文件,请参考`data/librispeech/librispeech.py`。该脚本将会下载 LibriSpeech 数据集并生成 manifest 文件。 +关于如何生成 manifest 文件,请参考`examples/librispeech/local/librispeech.py`。该脚本将会下载 LibriSpeech 数据集并生成 manifest 文件。 ### 计算均值和标准差用于归一化 @@ -166,11 +137,11 @@ python3 -m pip install paddlepaddle-gpu==1.8.0.post107 python3 tools/compute_mean_std.py \ --num_samples 2000 \ --specgram_type linear \ ---manifest_path data/librispeech/manifest.train \ ---output_path data/librispeech/mean_std.npz +--manifest_path examples/librispeech/data/manifest.train \ +--output_path examples/librispeech/data/mean_std.npz ``` -以上这段代码会计算在`data/librispeech/manifest.train`路径中,2000 个随机采样的语音频谱特征的均值和标准差,并将结果保存在`data/librispeech/mean_std.npz`中,方便以后使用。 +以上这段代码会计算在`examples/librispeech/data/manifest.train`路径中,2000 个随机采样的语音频谱特征的均值和标准差,并将结果保存在`examples/librispeech/data/mean_std.npz`中,方便以后使用。 ### 建立词表 @@ -179,22 +150,24 @@ python3 tools/compute_mean_std.py \ ```bash python3 tools/build_vocab.py \ --count_threshold 0 \ ---vocab_path data/librispeech/eng_vocab.txt \ ---manifest_paths data/librispeech/manifest.train +--vocab_path examples/librispeech/data/eng_vocab.txt \ +--manifest_paths examples/librispeech/data/manifest.train ``` -它将`data/librispeech/manifest.train`目录中的所有录音文本写入词表文件`data/librispeeech/eng_vocab.txt`,并且没有词汇截断(`--count_threshold 0`)。 +它将`examples/librispeech/data/manifest.train`目录中的所有录音文本写入词表文件`examples/librispeeech/data/eng_vocab.txt`,并且没有词汇截断(`--count_threshold 0`)。 ### 更多帮助 获得更多帮助: ```bash -python3 data/librispeech/librispeech.py --help +python3 examples/librispeech/local/librispeech.py --help python3 tools/compute_mean_std.py --help python3 tools/build_vocab.py --help ``` + + ## 训练模型 `train.py`是训练模块的主要调用者。使用示例如下。 @@ -224,10 +197,10 @@ python3 tools/build_vocab.py --help ```bash python3 train.py --help ``` -或参考 `example/librispeech/run_train.sh`. +或参考 `example/librispeech/local/run_train.sh`. -## 数据增强流水线 +### 数据增强流水线 数据增强是用来提升深度学习性能的非常有效的技术。我们通过在原始音频中添加小的随机扰动(标签不变转换)获得新音频来增强我们的语音数据。你不必自己合成,因为数据增强已经嵌入到数据生成器中并且能够即时完成,在训练模型的每个epoch中随机合成音频。 @@ -263,6 +236,12 @@ python3 train.py --help 使用数据增强技术时要小心,由于扩大了训练和测试集的差异,不恰当的增强会对训练模型不利,导致训练和预测的差距增大。 +### 训练普通话语言 + +普通话语言训练与英语训练的关键步骤相同,我们提供了一个使用 Aishell 进行普通话训练的例子```examples/aishell```。如上所述,请执行```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh```和```sh run_infer.sh```做相应的数据准备,训练,测试和推断。我们还准备了一个预训练过的模型(执行./models/aishell/download_model.sh下载)供用户使用```run_infer_golden.sh```和```run_test_golden.sh```来。请注意,与英语语言模型不同,普通话语言模型是基于汉字的,请运行```tools/tune.py```来查找最佳设置。 + + + ## 推断和评价 ### 准备语言模型 @@ -321,7 +300,7 @@ bash download_lm_ch.sh ``` python3 infer.py --help ``` -或参考`example/librispeech/run_infer.sh`. +或参考`example/librispeech/local/run_infer.sh`. ### 评估模型 @@ -346,7 +325,9 @@ python3 infer.py --help ```bash python3 test.py --help ``` -或参考`example/librispeech/run_test.sh`. +或参考`example/librispeech/local/run_test.sh`. + + ## 超参数调整 @@ -386,11 +367,8 @@ python3 test.py --help ```bash python3 tune.py --help ``` -或参考`example/librispeech/run_tune.sh`. - -## 训练普通话语言 +或参考`example/librispeech/local/run_tune.sh`. -普通话语言训练与英语训练的关键步骤相同,我们提供了一个使用 Aishell 进行普通话训练的例子```examples/aishell```。如上所述,请执行```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh```和```sh run_infer.sh```做相应的数据准备,训练,测试和推断。我们还准备了一个预训练过的模型(执行./models/aishell/download_model.sh下载)供用户使用```run_infer_golden.sh```和```run_test_golden.sh```来。请注意,与英语语言模型不同,普通话语言模型是基于汉字的,请运行```tools/tune.py```来查找最佳设置。 ## 用自己的声音尝试现场演示 @@ -428,7 +406,7 @@ python3 -u deploy/demo_client.py \ 请注意,`deploy/demo_client.py`必须在带麦克风设备的机器上运行,而`deploy/demo_server.py`可以在没有任何录音硬件的情况下运行,例如任何远程服务器机器。如果服务器和客户端使用两台独立的机器运行,只需要注意将`host_ip`和`host_port`参数设置为实际可访问的IP地址和端口。如果它们在单台机器上运行,则不用作任何处理。 -请参考`examples/deploy_demo/run_english_demo_server.sh`,它将首先下载一个预先训练过的英语模型(用3000小时的内部语音数据训练),然后用模型启动演示服务器。通过运行`examples/mandarin/run_demo_client.sh`,你可以说英语来测试它。如果您想尝试其他模型,只需更新脚本中的`--model_path`参数即可。 +请参考`examples/deploy_demo/run_english_demo_server.sh`,它将首先下载一个预先训练过的英语模型(用3000小时的内部语音数据训练),然后用模型启动演示服务器。通过运行`examples/deploy_demo/run_demo_client.sh`,你可以说英语来测试它。如果您想尝试其他模型,只需更新脚本中的`--model_path`参数即可。 获得更多帮助: @@ -437,24 +415,6 @@ python3 deploy/demo_server.py --help python3 deploy/demo_client.py --help ``` -## 发布模型 - -#### 语音模型发布 - -语种 | 模型名 | 训练数据 | 语音时长 -:-----------: | :------------: | :----------: | -------: -English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h -Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h - -#### 语言模型发布 - -语言模型 | 训练数据 | 基于的字符 | 大小 | 描述 -:-------------:| :------------:| :-----: | -----: | :----------------- -[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' -[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings -[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings ## 实验和baseline @@ -494,6 +454,27 @@ Baidu Internal Testset | 12.64 `tools/profile.sh`提供了上述分析工具. + +## 发布模型 + +#### 语音模型发布 + +语种 | 模型名 | 训练数据 | 语音时长 +:-----------: | :------------: | :----------: | -------: +English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz) | Baidu Internal English Dataset | 8628 h +Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model_fluid.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h + +#### 语言模型发布 + +语言模型 | 训练数据 | 基于的字符 | 大小 | 描述 +:-------------:| :------------:| :-----: | -----: | :----------------- +[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings + + ## 问题和帮助 欢迎您在[Github问题](https://github.com/PaddlePaddle/models/issues)中提交问题和bug。也欢迎您为这个项目做出贡献。 diff --git a/data/noise/chime3_background.py b/data/noise/chime3_background.py index 30a2e14e7..8db09204e 100644 --- a/data/noise/chime3_background.py +++ b/data/noise/chime3_background.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Prepare CHiME3 background data. Download, unpack and create manifest files. diff --git a/data/voxforge/voxforge.py b/data/voxforge/voxforge.py index 8478166fb..3fb0ded88 100644 --- a/data/voxforge/voxforge.py +++ b/data/voxforge/voxforge.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Prepare VoxForge dataset Download, unpack and create manifest files. diff --git a/data_utils/__init__.py b/data_utils/__init__.py index e69de29bb..185a92b8d 100644 --- a/data_utils/__init__.py +++ b/data_utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/data_utils/audio.py b/data_utils/audio.py index d7b88952d..2ba7019a2 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the audio segment class.""" import numpy as np diff --git a/data_utils/augmentor/__init__.py b/data_utils/augmentor/__init__.py index e69de29bb..185a92b8d 100644 --- a/data_utils/augmentor/__init__.py +++ b/data_utils/augmentor/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index 37d6bc1dd..349cdc564 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the data augmentation pipeline.""" import json diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py index adeda6465..5b80be2fe 100644 --- a/data_utils/augmentor/base.py +++ b/data_utils/augmentor/base.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the abstract base class for augmentation models.""" from abc import ABCMeta, abstractmethod diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index 937e578bd..839c6a809 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the impulse response augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index 952ca3fb9..954d1b419 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the noise perturb augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py index deba9a77a..f5c7d99fd 100644 --- a/data_utils/augmentor/online_bayesian_normalization.py +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contain the online bayesian normalization augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index e26d06b46..3732e09cd 100644 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contain the resample augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/shift_perturb.py b/data_utils/augmentor/shift_perturb.py index a9b732161..8b8e60362 100644 --- a/data_utils/augmentor/shift_perturb.py +++ b/data_utils/augmentor/shift_perturb.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the volume perturb augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index 78dfd0c65..7b28f7ec6 100644 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contain the speech perturbation augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py index b1d8c190b..b98c7a3b4 100644 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the volume perturb augmentation model.""" from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/data.py b/data_utils/data.py index ed65a0947..125768898 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains data generator for orgnaizing various audio data preprocessing pipeline and offering data reader interface of PaddlePaddle requirements. """ diff --git a/data_utils/featurizer/__init__.py b/data_utils/featurizer/__init__.py index e69de29bb..185a92b8d 100644 --- a/data_utils/featurizer/__init__.py +++ b/data_utils/featurizer/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index c5f20a16b..0afd19870 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the audio featurizer class.""" import numpy as np diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index a4484ae44..2e1424fa4 100644 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the speech featurizer class.""" from data_utils.featurizer.audio_featurizer import AudioFeaturizer diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 0c48b38e0..70aa10ead 100644 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the text featurizer class.""" import os diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py index 378714cc0..83a008f10 100644 --- a/data_utils/normalizer.py +++ b/data_utils/normalizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains feature normalizers.""" import numpy as np diff --git a/data_utils/speech.py b/data_utils/speech.py index 3ea6ec296..01c1787a4 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains the speech segment class.""" import numpy as np diff --git a/data_utils/utility.py b/data_utils/utility.py index 2377bc221..6cc1b2713 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains data helper functions.""" import json diff --git a/decoders/__init__.py b/decoders/__init__.py index e69de29bb..185a92b8d 100644 --- a/decoders/__init__.py +++ b/decoders/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/decoders/decoders_deprecated.py b/decoders/decoders_deprecated.py index 0dad44c4a..99e14e49d 100644 --- a/decoders/decoders_deprecated.py +++ b/decoders/decoders_deprecated.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains various CTC decoders.""" from itertools import groupby diff --git a/decoders/scorer_deprecated.py b/decoders/scorer_deprecated.py index 266df559b..919b02946 100644 --- a/decoders/scorer_deprecated.py +++ b/decoders/scorer_deprecated.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """External Scorer for Beam Search Decoder.""" import os diff --git a/decoders/swig/__init__.py b/decoders/swig/__init__.py index e69de29bb..185a92b8d 100644 --- a/decoders/swig/__init__.py +++ b/decoders/swig/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/decoders/swig/_init_paths.py b/decoders/swig/_init_paths.py index 3bb2fd197..c4b28c643 100644 --- a/decoders/swig/_init_paths.py +++ b/decoders/swig/_init_paths.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Set up paths for DS2""" import os.path diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py index c5a7c4ca6..0fcb24b50 100644 --- a/decoders/swig/setup.py +++ b/decoders/swig/setup.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Script to build and install decoder package.""" from setuptools import setup, Extension, distutils diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index b32893b92..0a0579ad0 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Wrapper for various CTC decoders in SWIG.""" import swig_decoders diff --git a/decoders/tests/test_decoders.py b/decoders/tests/test_decoders.py index 878417432..9c4b1c8eb 100644 --- a/decoders/tests/test_decoders.py +++ b/decoders/tests/test_decoders.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Test decoders.""" import unittest diff --git a/deploy/_init_paths.py b/deploy/_init_paths.py index 3bb2fd197..c4b28c643 100644 --- a/deploy/_init_paths.py +++ b/deploy/_init_paths.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Set up paths for DS2""" import os.path diff --git a/deploy/demo_client.py b/deploy/demo_client.py index 45a9d319a..b4aa50e8e 100644 --- a/deploy/demo_client.py +++ b/deploy/demo_client.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Client-end for the ASR demo.""" import keyboard import struct diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 88c2e9ef9..bfc48c9f1 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Server-end for the ASR demo.""" import os import time diff --git a/data/aishell/aishell.py b/examples/aishell/local/aishell.py similarity index 86% rename from data/aishell/aishell.py rename to examples/aishell/local/aishell.py index 6290712f2..ba59b744d 100644 --- a/data/aishell/aishell.py +++ b/examples/aishell/local/aishell.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Prepare Aishell mandarin dataset Download, unpack and create manifest files. diff --git a/examples/aishell/run_data.sh b/examples/aishell/local/run_data.sh similarity index 50% rename from examples/aishell/run_data.sh rename to examples/aishell/local/run_data.sh index fc002fcd2..b874b2df8 100644 --- a/examples/aishell/run_data.sh +++ b/examples/aishell/local/run_data.sh @@ -1,11 +1,11 @@ #! /usr/bin/env bash -cd ../.. > /dev/null +mkdir -p data # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python3 data/aishell/aishell.py \ ---manifest_prefix='data/aishell/manifest' \ ---target_dir='./dataset/aishell' +PYTHONPATH=.:$PYTHONPATH python3 local/aishell.py \ +--manifest_prefix="data/manifest" \ +--target_dir="${MAIN_ROOT}/dataset/aishell" if [ $? -ne 0 ]; then echo "Prepare Aishell failed. Terminated." @@ -14,10 +14,10 @@ fi # build vocabulary -python3 tools/build_vocab.py \ +python3 ${MAIN_ROOT}/tools/build_vocab.py \ --count_threshold=0 \ ---vocab_path='data/aishell/vocab.txt' \ ---manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev' +--vocab_path="data/vocab.txt" \ +--manifest_paths "data/manifest.train" "data/manifest.dev" if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." @@ -26,11 +26,11 @@ fi # compute mean and stddev for normalizer -python3 tools/compute_mean_std.py \ ---manifest_path='data/aishell/manifest.train' \ +python3 ${MAIN_ROOT}/tools/compute_mean_std.py \ +--manifest_path="data/manifest.train" \ --num_samples=2000 \ ---specgram_type='linear' \ ---output_path='data/aishell/mean_std.npz' +--specgram_type="linear" \ +--output_path="data/mean_std.npz" if [ $? -ne 0 ]; then echo "Compute mean and stddev failed. Terminated." diff --git a/examples/aishell/run_infer.sh b/examples/aishell/local/run_infer.sh similarity index 55% rename from examples/aishell/run_infer.sh rename to examples/aishell/local/run_infer.sh index ef382e310..90be581be 100644 --- a/examples/aishell/run_infer.sh +++ b/examples/aishell/local/run_infer.sh @@ -1,9 +1,8 @@ #! /usr/bin/env bash -cd ../.. > /dev/null # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +12,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=300 \ --num_proc_bsearch=8 \ @@ -27,14 +26,14 @@ python3 -u infer.py \ --use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ ---infer_manifest='data/aishell/manifest.test' \ ---mean_std_path='data/aishell/mean_std.npz' \ ---vocab_path='data/aishell/vocab.txt' \ ---model_path='checkpoints/aishell/step_final' \ ---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='cer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="cer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/local/run_infer_golden.sh similarity index 55% rename from examples/aishell/run_infer_golden.sh rename to examples/aishell/local/run_infer_golden.sh index dabdc0c68..296c0d5b4 100644 --- a/examples/aishell/run_infer_golden.sh +++ b/examples/aishell/local/run_infer_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/aishell > /dev/null +cd ${MAIN_ROOT}/models/aishell > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=300 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u infer.py \ --use_gru=True \ --use_gpu=False \ --share_rnn_weights=False \ ---infer_manifest='data/aishell/manifest.test' \ ---mean_std_path='models/aishell/mean_std.npz' \ ---vocab_path='models/aishell/vocab.txt' \ ---model_path='models/aishell' \ ---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='cer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test" \ +--mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/aishell" \ +--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="cer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/aishell/run_test.sh b/examples/aishell/local/run_test.sh similarity index 56% rename from examples/aishell/run_test.sh rename to examples/aishell/local/run_test.sh index b56d164b9..d2dbfb4f0 100644 --- a/examples/aishell/run_test.sh +++ b/examples/aishell/local/run_test.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +11,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u ${MAIN_ROOT}/test.py \ --batch_size=128 \ --beam_size=300 \ --num_proc_bsearch=8 \ @@ -27,14 +25,14 @@ python3 -u test.py \ --use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ ---test_manifest='data/aishell/manifest.test' \ ---mean_std_path='data/aishell/mean_std.npz' \ ---vocab_path='data/aishell/vocab.txt' \ ---model_path='checkpoints/aishell/step_final' \ ---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='cer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="cer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/local/run_test_golden.sh similarity index 56% rename from examples/aishell/run_test_golden.sh rename to examples/aishell/local/run_test_golden.sh index 2f79cc647..062a1b99b 100644 --- a/examples/aishell/run_test_golden.sh +++ b/examples/aishell/local/run_test_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/aishell > /dev/null +cd ${MAIN_ROOT}/models/aishell > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u ${MAIN_ROOT}/test.py \ --batch_size=128 \ --beam_size=300 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u test.py \ --use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ ---test_manifest='data/aishell/manifest.test' \ ---mean_std_path='models/aishell/mean_std.npz' \ ---vocab_path='models/aishell/vocab.txt' \ ---model_path='models/aishell' \ ---lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='cer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test" \ +--mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/aishell" \ +--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="cer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/aishell/run_train.sh b/examples/aishell/local/run_train.sh similarity index 61% rename from examples/aishell/run_train.sh rename to examples/aishell/local/run_train.sh index 889e19047..5bde13721 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/local/run_train.sh @@ -1,12 +1,10 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # train model # if you wish to resume from an exists model, uncomment --init_from_pretrained_model export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u train.py \ +python3 -u ${MAIN_ROOT}/train.py \ --batch_size=64 \ --num_epoch=50 \ --num_conv_layers=2 \ @@ -24,14 +22,14 @@ python3 -u train.py \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=False \ ---train_manifest='data/aishell/manifest.train' \ ---dev_manifest='data/aishell/manifest.dev' \ ---mean_std_path='data/aishell/mean_std.npz' \ ---vocab_path='data/aishell/vocab.txt' \ ---output_model_dir='./checkpoints/aishell' \ ---augment_conf_path='conf/augmentation.config' \ ---specgram_type='linear' \ ---shuffle_method='batch_shuffle_clipped' \ +--train_manifest="data/manifest.train" \ +--dev_manifest="data/manifest.dev" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--output_model_dir="./checkpoints" \ +--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \ +--specgram_type="linear" \ +--shuffle_method="batch_shuffle_clipped" \ if [ $? -ne 0 ]; then echo "Failed in training!" diff --git a/examples/aishell/path.sh b/examples/aishell/path.sh new file mode 100644 index 000000000..fd1cebba8 --- /dev/null +++ b/examples/aishell/path.sh @@ -0,0 +1,8 @@ +export MAIN_ROOT=${PWD}/../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} diff --git a/examples/aishell/run.sh b/examples/aishell/run.sh new file mode 100644 index 000000000..93bf86388 --- /dev/null +++ b/examples/aishell/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +source path.sh + +# prepare data +bash ./local/run_data.sh + +# test pretrain model +bash ./local/run_test_golden.sh + +# test pretain model +bash ./local/run_infer_golden.sh + +# train model +bash ./local/run_train.sh + +# test model +bash ./local/run_test.sh + +# infer model +bash ./local/run_infer.sh diff --git a/examples/baidu_en8k/path.sh b/examples/baidu_en8k/path.sh new file mode 100644 index 000000000..fd1cebba8 --- /dev/null +++ b/examples/baidu_en8k/path.sh @@ -0,0 +1,8 @@ +export MAIN_ROOT=${PWD}/../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} diff --git a/examples/baidu_en8k/run_infer_golden.sh b/examples/baidu_en8k/run_infer_golden.sh index 85fea5f47..11d7541ee 100644 --- a/examples/baidu_en8k/run_infer_golden.sh +++ b/examples/baidu_en8k/run_infer_golden.sh @@ -1,9 +1,9 @@ #! /usr/bin/env bash -cd ../.. > /dev/null +source path.sh # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +12,7 @@ cd - > /dev/null # download well-trained model -cd models/baidu_en8k > /dev/null +cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +22,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=5 \ @@ -36,14 +36,14 @@ python3 -u infer.py \ --use_gru=True \ --use_gpu=False \ --share_rnn_weights=False \ ---infer_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='models/baidu_en8k/mean_std.npz' \ ---vocab_path='models/baidu_en8k/vocab.txt' \ ---model_path='models/baidu_en8k' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--infer_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/baidu_en8k" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh index 1ed633624..10c61a096 100644 --- a/examples/baidu_en8k/run_test_golden.sh +++ b/examples/baidu_en8k/run_test_golden.sh @@ -1,9 +1,9 @@ #! /usr/bin/env bash -cd ../.. > /dev/null +source path.sh # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +12,7 @@ cd - > /dev/null # download well-trained model -cd models/baidu_en8k > /dev/null +cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +22,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python3 -u test.py \ +python3 -u ${MAIN_ROOT}/test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -37,14 +37,14 @@ python3 -u test.py \ --use_gru=True \ --use_gpu=False \ --share_rnn_weights=False \ ---test_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='models/baidu_en8k/mean_std.npz' \ ---vocab_path='models/baidu_en8k/vocab.txt' \ ---model_path='models/baidu_en8k' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/baidu_en8k" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/deploy_demo/path.sh b/examples/deploy_demo/path.sh new file mode 100644 index 000000000..fd1cebba8 --- /dev/null +++ b/examples/deploy_demo/path.sh @@ -0,0 +1,8 @@ +export MAIN_ROOT=${PWD}/../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} diff --git a/examples/deploy_demo/run_demo_client.sh b/examples/deploy_demo/run_demo_client.sh index 7c2609511..60581c661 100644 --- a/examples/deploy_demo/run_demo_client.sh +++ b/examples/deploy_demo/run_demo_client.sh @@ -1,11 +1,11 @@ #! /usr/bin/env bash -cd ../.. > /dev/null +source path.sh # start demo client CUDA_VISIBLE_DEVICES=0 \ -python3 -u deploy/demo_client.py \ ---host_ip='localhost' \ +python3 -u ${MAIN_ROOT}/deploy/demo_client.py \ +--host_ip="localhost" \ --host_port=8086 \ if [ $? -ne 0 ]; then diff --git a/examples/deploy_demo/run_english_demo_server.sh b/examples/deploy_demo/run_english_demo_server.sh index 5d86ab6e1..ae092dbce 100644 --- a/examples/deploy_demo/run_english_demo_server.sh +++ b/examples/deploy_demo/run_english_demo_server.sh @@ -1,10 +1,10 @@ #! /usr/bin/env bash # TODO: replace the model with a mandarin model -cd ../.. > /dev/null +source path.sh # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +13,7 @@ cd - > /dev/null # download well-trained model -cd models/baidu_en8k > /dev/null +cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -23,8 +23,8 @@ cd - > /dev/null # start demo server CUDA_VISIBLE_DEVICES=0 \ -python3 -u deploy/demo_server.py \ ---host_ip='localhost' \ +python3 -u ${MAIN_ROOT}/deploy/demo_server.py \ +--host_ip="localhost" \ --host_port=8086 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ @@ -36,14 +36,14 @@ python3 -u deploy/demo_server.py \ --use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ ---speech_save_dir='demo_cache' \ ---warmup_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='models/baidu_en8k/mean_std.npz' \ ---vocab_path='models/baidu_en8k/vocab.txt' \ ---model_path='models/baidu_en8k' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---specgram_type='linear' +--speech_save_dir="demo_cache" \ +--warmup_manifest="${MAIN_ROOT}/examples/tiny/data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/baidu_en8k" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in starting demo server!" diff --git a/data/librispeech/librispeech.py b/examples/librispeech/local/librispeech.py similarity index 90% rename from data/librispeech/librispeech.py rename to examples/librispeech/local/librispeech.py index 8a136453b..ae1bae2de 100644 --- a/data/librispeech/librispeech.py +++ b/examples/librispeech/local/librispeech.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Prepare Librispeech ASR datasets. Download, unpack and create manifest files. diff --git a/examples/librispeech/local/run_data.sh b/examples/librispeech/local/run_data.sh new file mode 100644 index 000000000..cbcad7b8d --- /dev/null +++ b/examples/librispeech/local/run_data.sh @@ -0,0 +1,45 @@ +#! /usr/bin/env bash + +mkdir -p data + +# download data, generate manifests +PYTHONPATH=.:$PYTHONPATH python3 local/librispeech.py \ +--manifest_prefix="data/manifest" \ +--target_dir="${MAIN_ROOT}/dataset/librispeech" \ +--full_download="True" + +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi + +cat data/manifest.train-* | shuf > data/manifest.train + + +# build vocabulary +python3 ${MAIN_ROOT}/tools/build_vocab.py \ +--count_threshold=0 \ +--vocab_path="data/vocab.txt" \ +--manifest_paths="data/manifest.train" + +if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 +fi + + +# compute mean and stddev for normalizer +python3 ${MAIN_ROOT}/tools/compute_mean_std.py \ +--manifest_path="data/manifest.train" \ +--num_samples=2000 \ +--specgram_type="linear" \ +--output_path="data/mean_std.npz" + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "LibriSpeech Data preparation done." +exit 0 diff --git a/examples/tiny/run_infer.sh b/examples/librispeech/local/run_infer.sh similarity index 54% rename from examples/tiny/run_infer.sh rename to examples/librispeech/local/run_infer.sh index d88f4526a..33959b381 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/librispeech/local/run_infer.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +11,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -27,14 +25,14 @@ python3 -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='data/tiny/mean_std.npz' \ ---vocab_path='data/tiny/vocab.txt' \ ---model_path='./checkpoints/tiny/step_final' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/tiny/run_infer_golden.sh b/examples/librispeech/local/run_infer_golden.sh similarity index 54% rename from examples/tiny/run_infer_golden.sh rename to examples/librispeech/local/run_infer_golden.sh index d18c21f54..21663681b 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/librispeech/local/run_infer_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/librispeech > /dev/null +cd ${MAIN_ROOT}/models/librispeech > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='models/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/librispeech" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/tiny/run_test.sh b/examples/librispeech/local/run_test.sh similarity index 56% rename from examples/tiny/run_test.sh rename to examples/librispeech/local/run_test.sh index 81eafe236..cd8c07542 100644 --- a/examples/tiny/run_test.sh +++ b/examples/librispeech/local/run_test.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +11,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u ${MAIN_ROOT}/test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -27,14 +25,14 @@ python3 -u test.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='data/tiny/mean_std.npz' \ ---vocab_path='data/tiny/vocab.txt' \ ---model_path='checkpoints/tiny/step_final' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/tiny/run_test_golden.sh b/examples/librispeech/local/run_test_golden.sh similarity index 55% rename from examples/tiny/run_test_golden.sh rename to examples/librispeech/local/run_test_golden.sh index d82865f42..54ec6ad03 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/librispeech/local/run_test_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/librispeech > /dev/null +cd ${MAIN_ROOT}/models/librispeech > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u ${MAIN_ROOT}/test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u test.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='models/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/librispeech" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/local/run_train.sh similarity index 60% rename from examples/librispeech/run_train.sh rename to examples/librispeech/local/run_train.sh index 6b8982d78..32aa2657b 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/local/run_train.sh @@ -1,13 +1,11 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # train model # if you wish to resume from an exists model, uncomment --init_from_pretrained_model export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u train.py \ +python3 -u ${MAIN_ROOT}/train.py \ --batch_size=20 \ --num_epoch=50 \ --num_conv_layers=2 \ @@ -25,14 +23,14 @@ python3 -u train.py \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=True \ ---train_manifest='data/librispeech/manifest.train' \ ---dev_manifest='data/librispeech/manifest.dev-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/vocab.txt' \ ---output_model_dir='./checkpoints/libri' \ ---augment_conf_path='conf/augmentation.config' \ ---specgram_type='linear' \ ---shuffle_method='batch_shuffle_clipped' \ +--train_manifest="data/manifest.train" \ +--dev_manifest="data/manifest.dev-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--output_model_dir="./checkpoints/libri" \ +--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \ +--specgram_type="linear" \ +--shuffle_method="batch_shuffle_clipped" \ if [ $? -ne 0 ]; then echo "Failed in training!" diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/local/run_tune.sh similarity index 59% rename from examples/librispeech/run_tune.sh rename to examples/librispeech/local/run_tune.sh index 834f060fa..848f0b8f9 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/local/run_tune.sh @@ -1,10 +1,8 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python3 -u tools/tune.py \ +python3 -u ${MAIN_ROOT}tools/tune.py \ --num_batches=-1 \ --batch_size=128 \ --beam_size=500 \ @@ -23,13 +21,13 @@ python3 -u tools/tune.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---tune_manifest='data/librispeech/manifest.dev-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--tune_manifest="data/manifest.dev-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/librispeech" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in tuning!" diff --git a/examples/librispeech/path.sh b/examples/librispeech/path.sh new file mode 100644 index 000000000..fd1cebba8 --- /dev/null +++ b/examples/librispeech/path.sh @@ -0,0 +1,8 @@ +export MAIN_ROOT=${PWD}/../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} diff --git a/examples/librispeech/run.sh b/examples/librispeech/run.sh new file mode 100644 index 000000000..c8e589139 --- /dev/null +++ b/examples/librispeech/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +source path.sh + +# prepare data +bash ./local/run_data.sh + +# test pretrain model +bash ./local/run_test_golden.sh + +# test pretain model +bash ./local/run_infer_golden.sh + +# train model +bash ./local/run_train.sh + +# test model +bash ./local/run_test.sh + +# infer model +bash ./local/run_infer.sh + +# tune model +bash ./local/run_tune.sh diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh deleted file mode 100644 index 788b415de..000000000 --- a/examples/librispeech/run_data.sh +++ /dev/null @@ -1,45 +0,0 @@ -#! /usr/bin/env bash - -cd ../.. > /dev/null - -# download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \ ---manifest_prefix='data/librispeech/manifest' \ ---target_dir='./dataset/librispeech' \ ---full_download='True' - -if [ $? -ne 0 ]; then - echo "Prepare LibriSpeech failed. Terminated." - exit 1 -fi - -cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train - - -# build vocabulary -python3 tools/build_vocab.py \ ---count_threshold=0 \ ---vocab_path='data/librispeech/vocab.txt' \ ---manifest_paths='data/librispeech/manifest.train' - -if [ $? -ne 0 ]; then - echo "Build vocabulary failed. Terminated." - exit 1 -fi - - -# compute mean and stddev for normalizer -python3 tools/compute_mean_std.py \ ---manifest_path='data/librispeech/manifest.train' \ ---num_samples=2000 \ ---specgram_type='linear' \ ---output_path='data/librispeech/mean_std.npz' - -if [ $? -ne 0 ]; then - echo "Compute mean and stddev failed. Terminated." - exit 1 -fi - - -echo "LibriSpeech Data preparation done." -exit 0 diff --git a/examples/tiny/README.md b/examples/tiny/README.md new file mode 100644 index 000000000..d7361b263 --- /dev/null +++ b/examples/tiny/README.md @@ -0,0 +1,42 @@ +# Tiny Example + +1. `source path.sh` +2. `bash run.sh` + +## Steps +- Prepare the data + + ```bash + sh local/run_data.sh + ``` + + `run_data.sh` will download dataset, generate manifests, collect normalizer's statistics and build vocabulary. Once the data preparation is done, you will find the data (only part of LibriSpeech) downloaded in `${MAIN_ROOT}/dataset/librispeech` and the corresponding manifest files generated in `${PWD}/data` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time you run this dataset and is reusable for all further experiments. +- Train your own ASR model + + ```bash + sh local/run_train.sh + ``` + + `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `${PWD}/checkpoints`. These checkpoints could be used for training resuming, inference, evaluation and deployment. +- Case inference with an existing model + + ```bash + sh local/run_infer.sh + ``` + + `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, you can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: + + ```bash + sh local/run_infer_golden.sh + ``` +- Evaluate an existing model + + ```bash + sh local/run_test.sh + ``` + + `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, you can also download a well-trained model and test its performance: + + ```bash + sh local/run_test_golden.sh + ``` \ No newline at end of file diff --git a/examples/tiny/local/run_data.sh b/examples/tiny/local/run_data.sh new file mode 100644 index 000000000..3ad387dbc --- /dev/null +++ b/examples/tiny/local/run_data.sh @@ -0,0 +1,47 @@ +#! /usr/bin/env bash + +# prepare folder +if [ ! -e data ]; then + mkdir data +fi + +# download data, generate manifests +PYTHONPATH=.:$PYTHONPATH python3 ../librispeech/local/librispeech.py \ +--manifest_prefix="data/manifest" \ +--target_dir="${MAIN_ROOT}/dataset/librispeech" \ +--full_download="False" + +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi + +head -n 64 data/manifest.dev-clean > data/manifest.tiny + +# build vocabulary +python3 ${MAIN_ROOT}/tools/build_vocab.py \ +--count_threshold=0 \ +--vocab_path="data/vocab.txt" \ +--manifest_paths="data/manifest.tiny" + +if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 +fi + + +# compute mean and stddev for normalizer +python3 ${MAIN_ROOT}/tools/compute_mean_std.py \ +--manifest_path="data/manifest.tiny" \ +--num_samples=64 \ +--specgram_type="linear" \ +--output_path="data/mean_std.npz" + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "LibriSpeech Data preparation done." +exit 0 diff --git a/examples/librispeech/run_infer.sh b/examples/tiny/local/run_infer.sh similarity index 53% rename from examples/librispeech/run_infer.sh rename to examples/tiny/local/run_infer.sh index 2452b45e3..bbaa094e9 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/tiny/local/run_infer.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd $MAIN_ROOT/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +11,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u $MAIN_ROOT/infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -27,14 +25,14 @@ python3 -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/vocab.txt' \ ---model_path='checkpoints/libri/step_final' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/librispeech/run_infer_golden.sh b/examples/tiny/local/run_infer_golden.sh similarity index 54% rename from examples/librispeech/run_infer_golden.sh rename to examples/tiny/local/run_infer_golden.sh index 6004c6af6..21663681b 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/tiny/local/run_infer_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd ${MAIN_ROOT}/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/librispeech > /dev/null +cd ${MAIN_ROOT}/models/librispeech > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python3 -u infer.py \ +python3 -u ${MAIN_ROOT}/infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='models/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--infer_manifest="data/manifest.test-clean" \ +--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \ +--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \ +--model_path="${MAIN_ROOT}/models/librispeech" \ +--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in inference!" diff --git a/examples/librispeech/run_test.sh b/examples/tiny/local/run_test.sh similarity index 55% rename from examples/librispeech/run_test.sh rename to examples/tiny/local/run_test.sh index e8e65c39e..ef1fa5a2d 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/tiny/local/run_test.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd $MAIN_ROOT/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -13,7 +11,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u $MAIN_ROOT/test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -27,14 +25,14 @@ python3 -u test.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/vocab.txt' \ ---model_path='checkpoints/libri/step_final' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="checkpoints/step_final" \ +--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/librispeech/run_test_golden.sh b/examples/tiny/local/run_test_golden.sh similarity index 55% rename from examples/librispeech/run_test_golden.sh rename to examples/tiny/local/run_test_golden.sh index 1db36758a..9983fade8 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/tiny/local/run_test_golden.sh @@ -1,9 +1,7 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download language model -cd models/lm > /dev/null +cd $MAIN_ROOT/models/lm > /dev/null bash download_lm_en.sh if [ $? -ne 0 ]; then exit 1 @@ -12,7 +10,7 @@ cd - > /dev/null # download well-trained model -cd models/librispeech > /dev/null +cd $MAIN_ROOT/models/librispeech > /dev/null bash download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -22,7 +20,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python3 -u test.py \ +python3 -u $MAIN_ROOT/test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ @@ -36,14 +34,14 @@ python3 -u test.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='models/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---decoding_method='ctc_beam_search' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--test_manifest="data/manifest.test-clean" \ +--mean_std_path="$MAIN_ROOT/models/librispeech/mean_std.npz" \ +--vocab_path="$MAIN_ROOT/models/librispeech/vocab.txt" \ +--model_path="$MAIN_ROOT/models/librispeech" \ +--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \ +--decoding_method="ctc_beam_search" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/tiny/run_train.sh b/examples/tiny/local/run_train.sh similarity index 62% rename from examples/tiny/run_train.sh rename to examples/tiny/local/run_train.sh index fe5b62030..de9dcbd74 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/local/run_train.sh @@ -1,12 +1,10 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # train model # if you wish to resume from an exists model, uncomment --init_from_pretrained_model export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python3 -u train.py \ +python3 -u ${MAIN_ROOT}/train.py \ --batch_size=4 \ --num_epoch=20 \ --num_conv_layers=2 \ @@ -24,14 +22,14 @@ python3 -u train.py \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=True \ ---train_manifest='data/tiny/manifest.tiny' \ ---dev_manifest='data/tiny/manifest.tiny' \ ---mean_std_path='data/tiny/mean_std.npz' \ ---vocab_path='data/tiny/vocab.txt' \ ---output_model_dir='./checkpoints/tiny' \ ---augment_conf_path='conf/augmentation.config' \ ---specgram_type='linear' \ ---shuffle_method='batch_shuffle_clipped' \ +--train_manifest="data/manifest.tiny" \ +--dev_manifest="data/manifest.tiny" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--output_model_dir="./checkpoints/" \ +--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \ +--specgram_type="linear" \ +--shuffle_method="batch_shuffle_clipped" \ if [ $? -ne 0 ]; then echo "Failed in training!" diff --git a/examples/tiny/run_tune.sh b/examples/tiny/local/run_tune.sh similarity index 61% rename from examples/tiny/run_tune.sh rename to examples/tiny/local/run_tune.sh index bec71111a..b5cc4d6a1 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/local/run_tune.sh @@ -1,10 +1,8 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python3 -u tools/tune.py \ +python3 -u $MAIN_ROOT/tools/tune.py \ --num_batches=-1 \ --batch_size=128 \ --beam_size=500 \ @@ -23,13 +21,13 @@ python3 -u tools/tune.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---tune_manifest='data/tiny/manifest.dev-clean' \ ---mean_std_path='data/tiny/mean_std.npz' \ ---vocab_path='data/tiny/vocab.txt' \ ---model_path='models/librispeech' \ ---lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ ---error_rate_type='wer' \ ---specgram_type='linear' +--tune_manifest="data/manifest.dev-clean" \ +--mean_std_path="data/mean_std.npz" \ +--vocab_path="data/vocab.txt" \ +--model_path="$MAIN_ROOT/models/librispeech" \ +--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \ +--error_rate_type="wer" \ +--specgram_type="linear" if [ $? -ne 0 ]; then echo "Failed in tuning!" diff --git a/examples/tiny/path.sh b/examples/tiny/path.sh new file mode 100644 index 000000000..fd1cebba8 --- /dev/null +++ b/examples/tiny/path.sh @@ -0,0 +1,8 @@ +export MAIN_ROOT=${PWD}/../../ + +export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH} +export LC_ALL=C + +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} diff --git a/examples/tiny/run.sh b/examples/tiny/run.sh new file mode 100644 index 000000000..c8e589139 --- /dev/null +++ b/examples/tiny/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +source path.sh + +# prepare data +bash ./local/run_data.sh + +# test pretrain model +bash ./local/run_test_golden.sh + +# test pretain model +bash ./local/run_infer_golden.sh + +# train model +bash ./local/run_train.sh + +# test model +bash ./local/run_test.sh + +# infer model +bash ./local/run_infer.sh + +# tune model +bash ./local/run_tune.sh diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh deleted file mode 100644 index dd75ddadc..000000000 --- a/examples/tiny/run_data.sh +++ /dev/null @@ -1,49 +0,0 @@ -#! /usr/bin/env bash - -cd ../.. > /dev/null - -# prepare folder -if [ ! -e data/tiny ]; then - mkdir data/tiny -fi - -# download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \ ---manifest_prefix='data/tiny/manifest' \ ---target_dir='./dataset/librispeech' \ ---full_download='False' - -if [ $? -ne 0 ]; then - echo "Prepare LibriSpeech failed. Terminated." - exit 1 -fi - -head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny - -# build vocabulary -python3 tools/build_vocab.py \ ---count_threshold=0 \ ---vocab_path='data/tiny/vocab.txt' \ ---manifest_paths='data/tiny/manifest.tiny' - -if [ $? -ne 0 ]; then - echo "Build vocabulary failed. Terminated." - exit 1 -fi - - -# compute mean and stddev for normalizer -python3 tools/compute_mean_std.py \ ---manifest_path='data/tiny/manifest.tiny' \ ---num_samples=64 \ ---specgram_type='linear' \ ---output_path='data/tiny/mean_std.npz' - -if [ $? -ne 0 ]; then - echo "Compute mean and stddev failed. Terminated." - exit 1 -fi - - -echo "LibriSpeech Data preparation done." -exit 0 diff --git a/infer.py b/infer.py index 910140282..ffcb48eb6 100644 --- a/infer.py +++ b/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Inferer for DeepSpeech2 model.""" import sys diff --git a/model_utils/__init__.py b/model_utils/__init__.py index e69de29bb..185a92b8d 100644 --- a/model_utils/__init__.py +++ b/model_utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/model_utils/model.py b/model_utils/model.py index 68b963cdf..f4555bd69 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains DeepSpeech2 model.""" import sys diff --git a/model_utils/network.py b/model_utils/network.py index b8812e609..19f9d887c 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -1,3 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import collections import paddle.fluid as fluid import numpy as np diff --git a/test.py b/test.py index 053a43acd..d3b601e98 100644 --- a/test.py +++ b/test.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Evaluation for DeepSpeech2 model.""" import argparse diff --git a/tools/_init_paths.py b/tools/_init_paths.py index 3bb2fd197..c4b28c643 100644 --- a/tools/_init_paths.py +++ b/tools/_init_paths.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Set up paths for DS2""" import os.path diff --git a/tools/build_vocab.py b/tools/build_vocab.py index 13ab843c4..77fd1fb63 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Build vocabulary from manifest files. Each item in vocabulary file is a character. diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index 203f83a2c..d40739f32 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Compute mean and std for feature normalizer, and save to file.""" import argparse diff --git a/tools/tune.py b/tools/tune.py index 74007f55a..36443e28b 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Beam search parameters tuning for DeepSpeech2 model.""" import sys diff --git a/train.py b/train.py index caa7c266e..067f6d786 100644 --- a/train.py +++ b/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Trainer for DeepSpeech2 model.""" import argparse diff --git a/utils/__init__.py b/utils/__init__.py index e69de29bb..185a92b8d 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/utils/error_rate.py b/utils/error_rate.py index 628a84cb4..d80546ee2 100644 --- a/utils/error_rate.py +++ b/utils/error_rate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """This module provides functions to calculate error rate in different level. e.g. wer for word-level, cer for char-level. """ diff --git a/utils/tests/test_error_rate.py b/utils/tests/test_error_rate.py index efa04b827..80c5b192a 100644 --- a/utils/tests/test_error_rate.py +++ b/utils/tests/test_error_rate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Test error rate.""" import unittest diff --git a/utils/utility.py b/utils/utility.py index 543f3ebce..cd7166593 100644 --- a/utils/utility.py +++ b/utils/utility.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Contains common utility functions.""" import distutils.util