parent
49d55a865c
commit
45f73c507c
@ -1,5 +1,4 @@
|
||||
.DS_Store
|
||||
*.pyc
|
||||
tools/venv
|
||||
dataset
|
||||
models/*
|
||||
.vscode
|
||||
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,15 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from deepspeech.training.trainer import *
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,57 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Contains common utility functions."""
|
||||
|
||||
import distutils.util
|
||||
|
||||
|
||||
def print_arguments(args):
|
||||
"""Print argparse's arguments.
|
||||
|
||||
Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("name", default="Jonh", type=str, help="User name.")
|
||||
args = parser.parse_args()
|
||||
print_arguments(args)
|
||||
|
||||
:param args: Input argparse.Namespace for printing.
|
||||
:type args: argparse.Namespace
|
||||
"""
|
||||
print("----------- Configuration Arguments -----------")
|
||||
for arg, value in sorted(vars(args).items()):
|
||||
print("%s: %s" % (arg, value))
|
||||
print("------------------------------------------------")
|
||||
|
||||
|
||||
def add_arguments(argname, type, default, help, argparser, **kwargs):
|
||||
"""Add argparse's argument.
|
||||
|
||||
Usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
add_argument("name", str, "Jonh", "User name.", parser)
|
||||
args = parser.parse_args()
|
||||
"""
|
||||
type = distutils.util.strtobool if type == bool else type
|
||||
argparser.add_argument(
|
||||
"--" + argname,
|
||||
default=default,
|
||||
type=type,
|
||||
help=help + ' Default: %(default)s.',
|
||||
**kwargs)
|
@ -0,0 +1,2 @@
|
||||
data
|
||||
ckpt*
|
@ -1,10 +1,13 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
. ../../utils/utility.sh
|
||||
. ${MAIN_ROOT}/utils/utility.sh
|
||||
|
||||
DIR=data/lm
|
||||
mkdir -p ${DIR}
|
||||
|
||||
URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
|
||||
MD5="29e02312deb2e59b3c8686c7966d4fe3"
|
||||
TARGET=./zh_giga.no_cna_cmn.prune01244.klm
|
||||
TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm
|
||||
|
||||
|
||||
echo "Download language model ..."
|
@ -1 +0,0 @@
|
||||
../../models
|
@ -1,21 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
source path.sh
|
||||
# only demos
|
||||
|
||||
# prepare data
|
||||
bash ./local/data.sh
|
||||
|
||||
# test pretrain model
|
||||
bash ./local/test_golden.sh
|
||||
|
||||
# test pretain model
|
||||
bash ./local/infer_golden.sh
|
||||
|
||||
# train model
|
||||
bash ./local/train.sh
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh
|
||||
|
||||
# test model
|
||||
bash ./local/test.sh
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ckpt/checkpoints/step-3284
|
||||
|
||||
# infer model
|
||||
bash ./local/infer.sh
|
||||
CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284
|
||||
|
@ -1,11 +1,13 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
. ../../utils/utility.sh
|
||||
. ${MAIN_ROOT}/utils/utility.sh
|
||||
|
||||
DIR=data/lm
|
||||
mkdir -p ${DIR}
|
||||
|
||||
URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
|
||||
MD5="099a601759d467cd0a8523ff939819c5"
|
||||
TARGET=./common_crawl_00.prune01111.trie.klm
|
||||
|
||||
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
|
||||
|
||||
echo "Download language model ..."
|
||||
download $URL $MD5 $TARGET
|
@ -1,10 +1,13 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
. ../../utils/utility.sh
|
||||
. ${MAIN_ROOT}/utils/utility.sh
|
||||
|
||||
DIR=data/pretrain
|
||||
mkdir -p ${DIR}
|
||||
|
||||
URL='https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz'
|
||||
MD5=7e58fbf64aa4ecf639b049792ddcf788
|
||||
TARGET=./baidu_en8k_model_fluid.tar.gz
|
||||
TARGET=${DIR}/baidu_en8k_model_fluid.tar.gz
|
||||
|
||||
|
||||
echo "Download BaiduEn8k model ..."
|
@ -0,0 +1 @@
|
||||
data_aishell*
|
@ -0,0 +1,7 @@
|
||||
dev-clean/
|
||||
dev-other/
|
||||
test-clean/
|
||||
test-other/
|
||||
train-clean-100/
|
||||
train-clean-360/
|
||||
train-other-500/
|
@ -0,0 +1,4 @@
|
||||
dev-clean/
|
||||
manifest.dev-clean
|
||||
manifest.train-clean
|
||||
train-clean/
|
@ -0,0 +1,115 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Prepare Librispeech ASR datasets.
|
||||
|
||||
Download, unpack and create manifest files.
|
||||
Manifest file is a json-format file with each line containing the
|
||||
meta data (i.e. audio filepath, transcript and audio duration)
|
||||
of each audio file in the data set.
|
||||
"""
|
||||
|
||||
import distutils.util
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import soundfile
|
||||
import json
|
||||
import codecs
|
||||
import io
|
||||
from utils.utility import download, unpack
|
||||
|
||||
URL_ROOT = "http://www.openslr.org/resources/31"
|
||||
URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz"
|
||||
URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz"
|
||||
|
||||
MD5_TRAIN_CLEAN = "5df7d4e78065366204ca6845bb08f490"
|
||||
MD5_DEV_CLEAN = "6d7ab67ac6a1d2c993d050e16d61080d"
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--target_dir",
|
||||
default='~/.cache/paddle/dataset/speech/libri',
|
||||
type=str,
|
||||
help="Directory to save the dataset. (default: %(default)s)")
|
||||
parser.add_argument(
|
||||
"--manifest_prefix",
|
||||
default="manifest",
|
||||
type=str,
|
||||
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def create_manifest(data_dir, manifest_path):
|
||||
"""Create a manifest json file summarizing the data set, with each line
|
||||
containing the meta data (i.e. audio filepath, transcription text, audio
|
||||
duration) of each audio file within the data set.
|
||||
"""
|
||||
print("Creating manifest %s ..." % manifest_path)
|
||||
json_lines = []
|
||||
for subfolder, _, filelist in sorted(os.walk(data_dir)):
|
||||
text_filelist = [
|
||||
filename for filename in filelist if filename.endswith('trans.txt')
|
||||
]
|
||||
if len(text_filelist) > 0:
|
||||
text_filepath = os.path.join(subfolder, text_filelist[0])
|
||||
for line in io.open(text_filepath, encoding="utf8"):
|
||||
segments = line.strip().split()
|
||||
text = ' '.join(segments[1:]).lower()
|
||||
audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
|
||||
audio_data, samplerate = soundfile.read(audio_filepath)
|
||||
duration = float(len(audio_data)) / samplerate
|
||||
json_lines.append(
|
||||
json.dumps({
|
||||
'audio_filepath': audio_filepath,
|
||||
'duration': duration,
|
||||
'text': text
|
||||
}))
|
||||
with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
|
||||
for line in json_lines:
|
||||
out_file.write(line + '\n')
|
||||
|
||||
|
||||
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||
"""Download, unpack and create summmary manifest file.
|
||||
"""
|
||||
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
|
||||
# download
|
||||
filepath = download(url, md5sum, target_dir)
|
||||
# unpack
|
||||
unpack(filepath, target_dir)
|
||||
else:
|
||||
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||
target_dir)
|
||||
# create manifest json file
|
||||
create_manifest(target_dir, manifest_path)
|
||||
|
||||
|
||||
def main():
|
||||
if args.target_dir.startswith('~'):
|
||||
args.target_dir = os.path.expanduser(args.target_dir)
|
||||
|
||||
prepare_dataset(
|
||||
url=URL_TRAIN_CLEAN,
|
||||
md5sum=MD5_TRAIN_CLEAN,
|
||||
target_dir=os.path.join(args.target_dir, "train-clean"),
|
||||
manifest_path=args.manifest_prefix + ".train-clean")
|
||||
prepare_dataset(
|
||||
url=URL_DEV_CLEAN,
|
||||
md5sum=MD5_DEV_CLEAN,
|
||||
target_dir=os.path.join(args.target_dir, "dev-clean"),
|
||||
manifest_path=args.manifest_prefix + ".dev-clean")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,123 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Prepare Aishell mandarin dataset
|
||||
|
||||
Download, unpack and create manifest files.
|
||||
Manifest file is a json-format file with each line containing the
|
||||
meta data (i.e. audio filepath, transcript and audio duration)
|
||||
of each audio file in the data set.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import codecs
|
||||
import soundfile
|
||||
import json
|
||||
import argparse
|
||||
from utils.utility import download, unpack
|
||||
|
||||
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
|
||||
|
||||
URL_ROOT = 'https://www.openslr.org/resources/17'
|
||||
DATA_URL = URL_ROOT + '/musan.tar.gz'
|
||||
MD5_DATA = ''
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--target_dir",
|
||||
default=DATA_HOME + "/musan",
|
||||
type=str,
|
||||
help="Directory to save the dataset. (default: %(default)s)")
|
||||
parser.add_argument(
|
||||
"--manifest_prefix",
|
||||
default="manifest",
|
||||
type=str,
|
||||
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def create_manifest(data_dir, manifest_path_prefix):
|
||||
print("Creating manifest %s ..." % manifest_path_prefix)
|
||||
json_lines = []
|
||||
transcript_path = os.path.join(data_dir, 'transcript',
|
||||
'aishell_transcript_v0.8.txt')
|
||||
transcript_dict = {}
|
||||
for line in codecs.open(transcript_path, 'r', 'utf-8'):
|
||||
line = line.strip()
|
||||
if line == '': continue
|
||||
audio_id, text = line.split(' ', 1)
|
||||
# remove withespace
|
||||
text = ''.join(text.split())
|
||||
transcript_dict[audio_id] = text
|
||||
|
||||
data_types = ['train', 'dev', 'test']
|
||||
for type in data_types:
|
||||
del json_lines[:]
|
||||
audio_dir = os.path.join(data_dir, 'wav', type)
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for fname in filelist:
|
||||
audio_path = os.path.join(subfolder, fname)
|
||||
audio_id = fname[:-4]
|
||||
# if no transcription for audio then skipped
|
||||
if audio_id not in transcript_dict:
|
||||
continue
|
||||
audio_data, samplerate = soundfile.read(audio_path)
|
||||
duration = float(len(audio_data) / samplerate)
|
||||
text = transcript_dict[audio_id]
|
||||
json_lines.append(
|
||||
json.dumps(
|
||||
{
|
||||
'audio_filepath': audio_path,
|
||||
'duration': duration,
|
||||
'text': text
|
||||
},
|
||||
ensure_ascii=False))
|
||||
manifest_path = manifest_path_prefix + '.' + type
|
||||
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
||||
for line in json_lines:
|
||||
fout.write(line + '\n')
|
||||
|
||||
|
||||
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||
"""Download, unpack and create manifest file."""
|
||||
data_dir = os.path.join(target_dir, 'data_aishell')
|
||||
if not os.path.exists(data_dir):
|
||||
filepath = download(url, md5sum, target_dir)
|
||||
unpack(filepath, target_dir)
|
||||
# unpack all audio tar files
|
||||
audio_dir = os.path.join(data_dir, 'wav')
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for ftar in filelist:
|
||||
unpack(os.path.join(subfolder, ftar), subfolder, True)
|
||||
else:
|
||||
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||
target_dir)
|
||||
create_manifest(data_dir, manifest_path)
|
||||
|
||||
|
||||
def main():
|
||||
if args.target_dir.startswith('~'):
|
||||
args.target_dir = os.path.expanduser(args.target_dir)
|
||||
|
||||
prepare_dataset(
|
||||
url=DATA_URL,
|
||||
md5sum=MD5_DATA,
|
||||
target_dir=args.target_dir,
|
||||
manifest_path=args.manifest_prefix)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,123 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Prepare Aishell mandarin dataset
|
||||
|
||||
Download, unpack and create manifest files.
|
||||
Manifest file is a json-format file with each line containing the
|
||||
meta data (i.e. audio filepath, transcript and audio duration)
|
||||
of each audio file in the data set.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import codecs
|
||||
import soundfile
|
||||
import json
|
||||
import argparse
|
||||
from data_utils.utility import download, unpack
|
||||
|
||||
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
|
||||
|
||||
URL_ROOT = 'http://www.openslr.org/resources/28'
|
||||
DATA_URL = URL_ROOT + '/rirs_noises.zip'
|
||||
MD5_DATA = ''
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--target_dir",
|
||||
default=DATA_HOME + "/Aishell",
|
||||
type=str,
|
||||
help="Directory to save the dataset. (default: %(default)s)")
|
||||
parser.add_argument(
|
||||
"--manifest_prefix",
|
||||
default="manifest",
|
||||
type=str,
|
||||
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def create_manifest(data_dir, manifest_path_prefix):
|
||||
print("Creating manifest %s ..." % manifest_path_prefix)
|
||||
json_lines = []
|
||||
transcript_path = os.path.join(data_dir, 'transcript',
|
||||
'aishell_transcript_v0.8.txt')
|
||||
transcript_dict = {}
|
||||
for line in codecs.open(transcript_path, 'r', 'utf-8'):
|
||||
line = line.strip()
|
||||
if line == '': continue
|
||||
audio_id, text = line.split(' ', 1)
|
||||
# remove withespace
|
||||
text = ''.join(text.split())
|
||||
transcript_dict[audio_id] = text
|
||||
|
||||
data_types = ['train', 'dev', 'test']
|
||||
for type in data_types:
|
||||
del json_lines[:]
|
||||
audio_dir = os.path.join(data_dir, 'wav', type)
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for fname in filelist:
|
||||
audio_path = os.path.join(subfolder, fname)
|
||||
audio_id = fname[:-4]
|
||||
# if no transcription for audio then skipped
|
||||
if audio_id not in transcript_dict:
|
||||
continue
|
||||
audio_data, samplerate = soundfile.read(audio_path)
|
||||
duration = float(len(audio_data) / samplerate)
|
||||
text = transcript_dict[audio_id]
|
||||
json_lines.append(
|
||||
json.dumps(
|
||||
{
|
||||
'audio_filepath': audio_path,
|
||||
'duration': duration,
|
||||
'text': text
|
||||
},
|
||||
ensure_ascii=False))
|
||||
manifest_path = manifest_path_prefix + '.' + type
|
||||
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
||||
for line in json_lines:
|
||||
fout.write(line + '\n')
|
||||
|
||||
|
||||
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||
"""Download, unpack and create manifest file."""
|
||||
data_dir = os.path.join(target_dir, 'data_aishell')
|
||||
if not os.path.exists(data_dir):
|
||||
filepath = download(url, md5sum, target_dir)
|
||||
unpack(filepath, target_dir)
|
||||
# unpack all audio tar files
|
||||
audio_dir = os.path.join(data_dir, 'wav')
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for ftar in filelist:
|
||||
unpack(os.path.join(subfolder, ftar), subfolder, True)
|
||||
else:
|
||||
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||
target_dir)
|
||||
create_manifest(data_dir, manifest_path)
|
||||
|
||||
|
||||
def main():
|
||||
if args.target_dir.startswith('~'):
|
||||
args.target_dir = os.path.expanduser(args.target_dir)
|
||||
|
||||
prepare_dataset(
|
||||
url=DATA_URL,
|
||||
md5sum=MD5_DATA,
|
||||
target_dir=args.target_dir,
|
||||
manifest_path=args.manifest_prefix)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,9 +1,12 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
TARGET_DIR=${MAIN_ROOT}/examples/dataset/voxforge
|
||||
mkdir -p ${TARGET_DIR}
|
||||
|
||||
# download data, generate manifests
|
||||
PYTHONPATH=../../:$PYTHONPATH python voxforge.py \
|
||||
--manifest_prefix='./manifest' \
|
||||
--target_dir='./dataset/VoxForge' \
|
||||
python ${MAIN_ROOT}/examples/dataset/voxforge/voxforge.py \
|
||||
--manifest_prefix="${TARGET_DIR}/manifest" \
|
||||
--target_dir="${TARGET_DIR}" \
|
||||
--is_merge_dialect=True \
|
||||
--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'
|
||||
|
@ -0,0 +1,2 @@
|
||||
data
|
||||
ckpt*
|
@ -0,0 +1,20 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
. ${MAIN_ROOT}/utils/utility.sh
|
||||
|
||||
DIR=data/lm
|
||||
mkdir -p ${DIR}
|
||||
|
||||
URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
|
||||
MD5="099a601759d467cd0a8523ff939819c5"
|
||||
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
|
||||
|
||||
echo "Download language model ..."
|
||||
download $URL $MD5 $TARGET
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Fail to download the language model!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
exit 0
|
@ -1,43 +1,21 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# download language model
|
||||
cd ${MAIN_ROOT}/models/lm > /dev/null
|
||||
bash download_lm_en.sh
|
||||
bash local/download_lm_en.sh
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
cd - > /dev/null
|
||||
|
||||
python3 -u ${BIN_DIR}/infer.py \
|
||||
--device 'gpu' \
|
||||
--nproc 1 \
|
||||
--config conf/deepspeech2.yaml \
|
||||
--output ckpt
|
||||
|
||||
# infer
|
||||
CUDA_VISIBLE_DEVICES=0 \
|
||||
python3 -u ${MAIN_ROOT}/infer.py \
|
||||
--num_samples=10 \
|
||||
--beam_size=500 \
|
||||
--num_proc_bsearch=8 \
|
||||
--num_conv_layers=2 \
|
||||
--num_rnn_layers=3 \
|
||||
--rnn_layer_size=2048 \
|
||||
--alpha=2.5 \
|
||||
--beta=0.3 \
|
||||
--cutoff_prob=1.0 \
|
||||
--cutoff_top_n=40 \
|
||||
--use_gru=False \
|
||||
--use_gpu=True \
|
||||
--share_rnn_weights=True \
|
||||
--infer_manifest="data/manifest.test-clean" \
|
||||
--mean_std_path="data/mean_std.npz" \
|
||||
--vocab_path="data/vocab.txt" \
|
||||
--model_path="checkpoints/step_final" \
|
||||
--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
|
||||
--decoding_method="ctc_beam_search" \
|
||||
--error_rate_type="wer" \
|
||||
--specgram_type="linear"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed in inference!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
exit 0
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue