Merge branch 'develop' of upstream into fix_docker_doc

pull/8/head
Yibing Liu 7 years ago
commit f862e0c646

@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -e
readonly VERSION="3.8"
readonly VERSION="3.9"
version=$(clang-format -version)

@ -33,11 +33,3 @@
entry: bash .clang_format.hook -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
- repo: local
hooks:
- id: convert-markdown-into-html
name: convert-markdown-into-html
description: Convert README.md into index.html
entry: python .pre-commit-hooks/convert_markdown_into_html.py
language: system
files: .+README\.md$

@ -16,21 +16,18 @@ addons:
- python
- python-pip
- python2.7-dev
ssh_known_hosts: 52.76.173.135
before_install:
- sudo pip install -U virtualenv pre-commit pip
- docker pull paddlepaddle/paddle:latest
script:
- .travis/precommit.sh
- docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c
'cd /py_unittest; sh .travis/unittest.sh'
- |
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export MODELS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $MODELS_DIR
- exit_code=0
- .travis/precommit.sh || exit_code=$(( exit_code | $? ))
- docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c
'cd /py_unittest; sh .travis/unittest.sh' || exit_code=$(( exit_code | $? ))
exit $exit_code
notifications:
email:
on_success: change

@ -24,13 +24,6 @@ unittest(){
trap 'abort' 0
set -e
for proj in */ ; do
if [ -d $proj ]; then
unittest $proj
if [ $? != 0 ]; then
exit 1
fi
fi
done
unittest .
trap : 0

@ -29,10 +29,17 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin
- PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation))
### Setup
- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis` and `swig`, e.g. installing them via `apt-get`:
```bash
git clone https://github.com/PaddlePaddle/models.git
cd models/deep_speech_2
sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev swig
```
- Run the setup script for the remaining dependencies
```bash
git clone https://github.com/PaddlePaddle/DeepSpeech.git
cd DeepSpeech
sh setup.sh
```
@ -479,9 +486,9 @@ python deploy/demo_client.py --help
Language | Model Name | Training Data | Hours of Speech
:-----------: | :------------: | :----------: | -------:
English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h
English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h
Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h
Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h
#### Language Model Released
@ -498,13 +505,15 @@ Language Model | Training Data | Token-based | Size | Descriptions
Test Set | LibriSpeech Model | BaiduEN8K Model
:--------------------- | ---------------: | -------------------:
LibriSpeech Test-Clean | 7.77 | 6.63
LibriSpeech Test-Other | 23.25 | 16.59
VoxForge American-Canadian | 12.52 |   7.46
VoxForge Commonwealth | 21.08 | 16.23
VoxForge European | 31.21 | 20.47
VoxForge Indian | 56.79 | 28.15
Baidu Internal Testset  |   47.73 |   8.92
LibriSpeech Test-Clean | 7.73 | 6.63
LibriSpeech Test-Other | 23.15 | 16.59
VoxForge American-Canadian | 12.30 |   7.46
VoxForge Commonwealth | 20.03 | 16.23
VoxForge European | 30.31 | 20.47
VoxForge Indian | 55.47 | 28.15
Baidu Internal Testset  |   44.71 |   8.92
For reproducing benchmark results on VoxForge data, we provide a script to download data and generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updating and the generated manifest files may have difference from those we evaluated on.
#### Benchmark Results for Mandarin Model (Character Error Rate)

@ -0,0 +1,16 @@
#! /usr/bin/env bash
# download data, generate manifests
PYTHONPATH=../../:$PYTHONPATH python voxforge.py \
--manifest_prefix='./manifest' \
--target_dir='~/.cache/paddle/dataset/speech/VoxForge' \
--is_merge_dialect=True \
--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'
if [ $? -ne 0 ]; then
echo "Prepare VoxForge failed. Terminated."
exit 1
fi
echo "VoxForge Data preparation done."
exit 0

@ -0,0 +1,221 @@
"""Prepare VoxForge dataset
Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import codecs
import soundfile
import json
import argparse
import shutil
import subprocess
from data_utils.utility import download_multi, unpack, getfile_insensitive
DATA_HOME = '~/.cache/paddle/dataset/speech'
DATA_URL = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/' \
'Audio/Main/16kHz_16bit'
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--target_dir",
default=DATA_HOME + "/VoxForge",
type=str,
help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
"--dialects",
default=[
'american', 'british', 'australian', 'european', 'irish', 'canadian',
'indian'
],
nargs='+',
type=str,
help="Dialect types. (default: %(default)s)")
parser.add_argument(
"--is_merge_dialect",
default=True,
type=bool,
help="If set True, manifests of american dialect and canadian dialect will "
"be merged to american-canadian dialect; manifests of british "
"dialect, irish dialect and australian dialect will be merged to "
"commonwealth dialect. (default: %(default)s)")
parser.add_argument(
"--manifest_prefix",
default="manifest",
type=str,
help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()
def download_and_unpack(target_dir, url):
wget_args = '-q -l 1 -N -nd -c -e robots=off -A tgz -r -np'
tgz_dir = os.path.join(target_dir, 'tgz')
exit_code = download_multi(url, tgz_dir, wget_args)
if exit_code != 0:
print('Download tgz audio files failed with exit code %d.' % exit_code)
else:
print('Download done, start unpacking ...')
audio_dir = os.path.join(target_dir, 'audio')
for root, dirs, files in os.walk(tgz_dir):
for file in files:
print(file)
if file.endswith('.tgz'):
unpack(os.path.join(root, file), audio_dir)
def select_dialects(target_dir, dialect_list):
"""Classify audio files by dialect."""
dialect_root_dir = os.path.join(target_dir, 'dialect')
if os.path.exists(dialect_root_dir):
shutil.rmtree(dialect_root_dir)
os.mkdir(dialect_root_dir)
audio_dir = os.path.abspath(os.path.join(target_dir, 'audio'))
for dialect in dialect_list:
# filter files by dialect
command = 'find %s -iwholename "*etc/readme*" -exec egrep -iHl \
"pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
p = subprocess.Popen(
command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
output, err = p.communicate()
dialect_dir = os.path.join(dialect_root_dir, dialect)
if os.path.exists(dialect_dir):
shutil.rmtree(dialect_dir)
os.mkdir(dialect_dir)
for path in output.splitlines():
src_dir = os.path.dirname(os.path.dirname(path))
link = os.path.basename(os.path.normpath(src_dir))
os.symlink(src_dir, os.path.join(dialect_dir, link))
def generate_manifest(data_dir, manifest_path):
json_lines = []
for path in os.listdir(data_dir):
audio_link = os.path.join(data_dir, path)
assert os.path.islink(
audio_link), '%s should be symbolic link.' % audio_link
actual_audio_dir = os.path.abspath(os.readlink(audio_link))
audio_type = ''
if os.path.isdir(os.path.join(actual_audio_dir, 'wav')):
audio_type = 'wav'
elif os.path.isdir(os.path.join(actual_audio_dir, 'flac')):
audio_type = 'flac'
else:
print('Unknown audio type, skipped processing %s.' %
actual_audio_dir)
continue
etc_dir = os.path.join(actual_audio_dir, 'etc')
prompts_file = os.path.join(etc_dir, 'PROMPTS')
if not os.path.isfile(prompts_file):
print('PROMPTS file missing, skip processing %s.' %
actual_audio_dir)
continue
readme_file = getfile_insensitive(os.path.join(etc_dir, 'README'))
if readme_file is None:
print('README file missing, skip processing %s.' % actual_audio_dir)
continue
for line in file(prompts_file):
u, trans = line.strip().split(None, 1)
u_parts = u.split('/')
# try to format the date time
try:
speaker, date, sfx = u_parts[-3].split('-')
obj = datetime.datetime.strptime(date, '%y.%m.%d')
formatted = obj.strftime('%Y%m%d')
u_parts[-3] = '-'.join([speaker, formatted, sfx])
except Exception as e:
pass
if len(u_parts) < 2:
u_parts = [audio_type] + u_parts
u_parts[-2] = audio_type
u_parts[-1] += '.' + audio_type
u = os.path.join(actual_audio_dir, '/'.join(u_parts[-2:]))
if not os.path.isfile(u):
print('Audio file missing, skip processing %s.' % u)
continue
if os.stat(u).st_size == 0:
print('Empty audio file, skip processing %s.' % u)
continue
trans = trans.strip().replace('-', ' ')
if not trans.isupper() or \
not trans.strip().replace(' ', '').replace("'", "").isalpha():
print("Transcript not normalized properly, skip processing %s."
% u)
continue
audio_data, samplerate = soundfile.read(u)
duration = float(len(audio_data)) / samplerate
json_lines.append(
json.dumps({
'audio_filepath': u,
'duration': duration,
'text': trans.lower()
}))
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
for line in json_lines:
fout.write(line + '\n')
def merge_manifests(manifest_files, save_path):
lines = []
for manifest_file in manifest_files:
line = codecs.open(manifest_file, 'r', 'utf-8').readlines()
lines += line
with codecs.open(save_path, 'w', 'utf-8') as fout:
for line in lines:
fout.write(line)
def prepare_dataset(url, dialects, target_dir, manifest_prefix, is_merge):
download_and_unpack(target_dir, url)
select_dialects(target_dir, dialects)
american_canadian_manifests = []
commonwealth_manifests = []
for dialect in dialects:
dialect_dir = os.path.join(target_dir, 'dialect', dialect)
manifest_fpath = manifest_prefix + '.' + dialect
if dialect == 'american' or dialect == 'canadian':
american_canadian_manifests.append(manifest_fpath)
if dialect == 'australian' \
or dialect == 'british' \
or dialect == 'irish':
commonwealth_manifests.append(manifest_fpath)
generate_manifest(dialect_dir, manifest_fpath)
if is_merge:
if len(american_canadian_manifests) > 0:
manifest_fpath = manifest_prefix + '.american-canadian'
merge_manifests(american_canadian_manifests, manifest_fpath)
if len(commonwealth_manifests) > 0:
manifest_fpath = manifest_prefix + '.commonwealth'
merge_manifests(commonwealth_manifests, manifest_fpath)
def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)
prepare_dataset(DATA_URL, args.dialects, args.target_dir,
args.manifest_prefix, args.is_merge_dialect)
if __name__ == '__main__':
main()

@ -42,6 +42,25 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
return manifest
def getfile_insensitive(path):
"""Get the actual file path when given insensitive filename."""
directory, filename = os.path.split(path)
directory, filename = (directory or '.'), filename.lower()
for f in os.listdir(directory):
newpath = os.path.join(directory, f)
if os.path.isfile(newpath) and f.lower() == filename:
return newpath
def download_multi(url, target_dir, extra_args):
"""Download multiple files from url to target_dir."""
if not os.path.exists(target_dir): os.makedirs(target_dir)
print("Downloading %s ..." % url)
ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
target_dir)
return ret_code
def download(url, md5sum, target_dir):
"""Download file from url to target_dir, and check md5sum."""
if not os.path.exists(target_dir): os.makedirs(target_dir)

@ -52,7 +52,7 @@ PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
} else {
if (has_dictionary_) {
matcher_->SetState(dictionary_state_);
bool found = matcher_->Find(new_char);
bool found = matcher_->Find(new_char + 1);
if (!found) {
// Adding this character causes word outside dictionary
auto FSTZERO = fst::TropicalWeight::Zero();

@ -149,13 +149,15 @@ void Scorer::set_char_map(const std::vector<std::string>& char_list) {
char_list_ = char_list;
char_map_.clear();
// Set the char map for the FST for spelling correction
for (size_t i = 0; i < char_list_.size(); i++) {
if (char_list_[i] == " ") {
SPACE_ID_ = i;
char_map_[' '] = i;
} else if (char_list_[i].size() == 1) {
char_map_[char_list_[i][0]] = i;
}
// The initial state of FST is state 0, hence the index of chars in
// the FST should start from 1 to avoid the conflict with the initial
// state, otherwise wrong decoding results would be given.
char_map_[char_list_[i]] = i + 1;
}
}
@ -193,17 +195,11 @@ std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
void Scorer::fill_dictionary(bool add_space) {
fst::StdVectorFst dictionary;
// First reverse char_list so ints can be accessed by chars
std::unordered_map<std::string, int> char_map;
for (size_t i = 0; i < char_list_.size(); i++) {
char_map[char_list_[i]] = i;
}
// For each unigram convert to ints and put in trie
int dict_size = 0;
for (const auto& word : vocabulary_) {
bool added = add_word_to_dictionary(
word, char_map, add_space, SPACE_ID_, &dictionary);
word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
dict_size += added ? 1 : 0;
}

@ -104,7 +104,7 @@ private:
int SPACE_ID_;
std::vector<std::string> char_list_;
std::unordered_map<char, int> char_map_;
std::unordered_map<std::string, int> char_map_;
std::vector<std::string> vocabulary_;
};

@ -113,7 +113,7 @@ decoders_module = [
setup(
name='swig_decoders',
version='1.0',
version='1.1',
description="""CTC decoders""",
ext_modules=decoders_module,
py_modules=['swig_decoders'], )

@ -147,7 +147,8 @@ def start_server():
augmentation_config='{}',
specgram_type=args.specgram_type,
num_threads=1,
keep_transcription_text=True)
keep_transcription_text=True,
num_conv_layers=args.num_conv_layers)
# prepare ASR model
ds2_model = DeepSpeech2Model(
vocab_size=data_generator.vocab_size,
@ -163,8 +164,20 @@ def start_server():
# prepare ASR inference handler
def file_to_transcript(filename):
feature = data_generator.process_utterance(filename, "")
ins = []
conv0_h = (feature[0].shape[0] - 1) // 2 + 1
conv0_w = (feature[0].shape[1] - 1) // 3 + 1
ins += [feature[0], feature[1],
[0], [conv0_w],
[1, 32, 1, conv0_h, conv0_w + 1, conv0_w]]
pre_h = conv0_h
for i in xrange(args.num_conv_layers - 1):
h = (pre_h - 1) // 2 + 1
pre_h = h
ins += [[1, 32, 1, h, conv0_w + 1, conv0_w]]
result_transcript = ds2_model.infer_batch(
infer_data=[feature],
infer_data=[ins],
decoding_method=args.decoding_method,
beam_alpha=args.alpha,
beam_beta=args.beta,
@ -173,7 +186,8 @@ def start_server():
cutoff_top_n=args.cutoff_top_n,
vocab_list=vocab_list,
language_model_path=args.lang_model_path,
num_processes=1)
num_processes=1,
feeding_dict=data_generator.feeding)
return result_transcript[0]
# warming up with utterrances sampled from Librispeech

@ -21,11 +21,11 @@ python -u infer.py \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=1024 \
--alpha=1.4 \
--beta=2.4 \
--alpha=2.6 \
--beta=5.0 \
--cutoff_prob=0.99 \
--cutoff_top_n=40 \
--use_gru=False \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \

@ -30,11 +30,11 @@ python -u infer.py \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=1024 \
--alpha=1.4 \
--beta=2.4 \
--alpha=2.6 \
--beta=5.0 \
--cutoff_prob=0.99 \
--cutoff_top_n=40 \
--use_gru=False \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \

@ -22,11 +22,11 @@ python -u test.py \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=1024 \
--alpha=1.4 \
--beta=2.4 \
--alpha=2.6 \
--beta=5.0 \
--cutoff_prob=0.99 \
--cutoff_top_n=40 \
--use_gru=False \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \

@ -31,11 +31,11 @@ python -u test.py \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=1024 \
--alpha=1.4 \
--beta=2.4 \
--alpha=2.6 \
--beta=5.0 \
--cutoff_prob=0.99 \
--cutoff_top_n=40 \
--use_gru=False \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \

@ -19,7 +19,7 @@ python -u train.py \
--min_duration=0.0 \
--test_off=False \
--use_sortagrad=True \
--use_gru=False \
--use_gru=True \
--use_gpu=True \
--is_local=True \
--share_rnn_weights=False \

@ -5,7 +5,7 @@ cd ../.. > /dev/null
# download data, generate manifests
PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \
--target_dir='~/.cache/paddle/dataset/speech/Libri' \
--target_dir='~/.cache/paddle/dataset/speech/libri' \
--full_download='True'
if [ $? -ne 0 ]; then

@ -2,8 +2,8 @@
. ../../utils/utility.sh
URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274'
MD5=28521a58552885a81cf92a1e9b133a71
URL='http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973'
MD5=0ee83aa15fba421e5de8fc66c8feb350
TARGET=./aishell_model.tar.gz

@ -2,8 +2,8 @@
. ../../utils/utility.sh
URL='http://cloud.dlnel.org/filepub/?uuid=6020a634-5399-4423-b021-c5ed32680fff'
MD5=2ef08f8b608a7c555592161fc14d81a6
URL='http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6'
MD5=1f72d0c5591f453362f0caa09dd57618
TARGET=./librispeech_model.tar.gz

@ -27,7 +27,7 @@ if [ $? != 0 ]; then
fi
# install decoders
python -c "import swig_decoders"
python -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")"
if [ $? != 0 ]; then
cd decoders/swig > /dev/null
sh setup.sh

Loading…
Cancel
Save