parent
49d55a865c
commit
45f73c507c
@ -1,5 +1,4 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
*.pyc
|
*.pyc
|
||||||
tools/venv
|
tools/venv
|
||||||
dataset
|
.vscode
|
||||||
models/*
|
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
@ -0,0 +1,15 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from deepspeech.training.trainer import *
|
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
@ -0,0 +1,57 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Contains common utility functions."""
|
||||||
|
|
||||||
|
import distutils.util
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
"""Print argparse's arguments.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("name", default="Jonh", type=str, help="User name.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
|
||||||
|
:param args: Input argparse.Namespace for printing.
|
||||||
|
:type args: argparse.Namespace
|
||||||
|
"""
|
||||||
|
print("----------- Configuration Arguments -----------")
|
||||||
|
for arg, value in sorted(vars(args).items()):
|
||||||
|
print("%s: %s" % (arg, value))
|
||||||
|
print("------------------------------------------------")
|
||||||
|
|
||||||
|
|
||||||
|
def add_arguments(argname, type, default, help, argparser, **kwargs):
|
||||||
|
"""Add argparse's argument.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
add_argument("name", str, "Jonh", "User name.", parser)
|
||||||
|
args = parser.parse_args()
|
||||||
|
"""
|
||||||
|
type = distutils.util.strtobool if type == bool else type
|
||||||
|
argparser.add_argument(
|
||||||
|
"--" + argname,
|
||||||
|
default=default,
|
||||||
|
type=type,
|
||||||
|
help=help + ' Default: %(default)s.',
|
||||||
|
**kwargs)
|
@ -0,0 +1,2 @@
|
|||||||
|
data
|
||||||
|
ckpt*
|
@ -1,10 +1,13 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
. ../../utils/utility.sh
|
. ${MAIN_ROOT}/utils/utility.sh
|
||||||
|
|
||||||
|
DIR=data/lm
|
||||||
|
mkdir -p ${DIR}
|
||||||
|
|
||||||
URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
|
URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
|
||||||
MD5="29e02312deb2e59b3c8686c7966d4fe3"
|
MD5="29e02312deb2e59b3c8686c7966d4fe3"
|
||||||
TARGET=./zh_giga.no_cna_cmn.prune01244.klm
|
TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm
|
||||||
|
|
||||||
|
|
||||||
echo "Download language model ..."
|
echo "Download language model ..."
|
@ -1 +0,0 @@
|
|||||||
../../models
|
|
@ -1,21 +1,16 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
source path.sh
|
source path.sh
|
||||||
|
# only demos
|
||||||
|
|
||||||
# prepare data
|
# prepare data
|
||||||
bash ./local/data.sh
|
bash ./local/data.sh
|
||||||
|
|
||||||
# test pretrain model
|
|
||||||
bash ./local/test_golden.sh
|
|
||||||
|
|
||||||
# test pretain model
|
|
||||||
bash ./local/infer_golden.sh
|
|
||||||
|
|
||||||
# train model
|
# train model
|
||||||
bash ./local/train.sh
|
CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh
|
||||||
|
|
||||||
# test model
|
# test model
|
||||||
bash ./local/test.sh
|
CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ckpt/checkpoints/step-3284
|
||||||
|
|
||||||
# infer model
|
# infer model
|
||||||
bash ./local/infer.sh
|
CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
. ../../utils/utility.sh
|
. ${MAIN_ROOT}/utils/utility.sh
|
||||||
|
|
||||||
|
DIR=data/lm
|
||||||
|
mkdir -p ${DIR}
|
||||||
|
|
||||||
URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
|
URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
|
||||||
MD5="099a601759d467cd0a8523ff939819c5"
|
MD5="099a601759d467cd0a8523ff939819c5"
|
||||||
TARGET=./common_crawl_00.prune01111.trie.klm
|
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
|
||||||
|
|
||||||
|
|
||||||
echo "Download language model ..."
|
echo "Download language model ..."
|
||||||
download $URL $MD5 $TARGET
|
download $URL $MD5 $TARGET
|
@ -1,10 +1,13 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
. ../../utils/utility.sh
|
. ${MAIN_ROOT}/utils/utility.sh
|
||||||
|
|
||||||
|
DIR=data/pretrain
|
||||||
|
mkdir -p ${DIR}
|
||||||
|
|
||||||
URL='https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz'
|
URL='https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz'
|
||||||
MD5=7e58fbf64aa4ecf639b049792ddcf788
|
MD5=7e58fbf64aa4ecf639b049792ddcf788
|
||||||
TARGET=./baidu_en8k_model_fluid.tar.gz
|
TARGET=${DIR}/baidu_en8k_model_fluid.tar.gz
|
||||||
|
|
||||||
|
|
||||||
echo "Download BaiduEn8k model ..."
|
echo "Download BaiduEn8k model ..."
|
@ -0,0 +1 @@
|
|||||||
|
data_aishell*
|
@ -0,0 +1,7 @@
|
|||||||
|
dev-clean/
|
||||||
|
dev-other/
|
||||||
|
test-clean/
|
||||||
|
test-other/
|
||||||
|
train-clean-100/
|
||||||
|
train-clean-360/
|
||||||
|
train-other-500/
|
@ -0,0 +1,4 @@
|
|||||||
|
dev-clean/
|
||||||
|
manifest.dev-clean
|
||||||
|
manifest.train-clean
|
||||||
|
train-clean/
|
@ -0,0 +1,115 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Prepare Librispeech ASR datasets.
|
||||||
|
|
||||||
|
Download, unpack and create manifest files.
|
||||||
|
Manifest file is a json-format file with each line containing the
|
||||||
|
meta data (i.e. audio filepath, transcript and audio duration)
|
||||||
|
of each audio file in the data set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import distutils.util
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import soundfile
|
||||||
|
import json
|
||||||
|
import codecs
|
||||||
|
import io
|
||||||
|
from utils.utility import download, unpack
|
||||||
|
|
||||||
|
URL_ROOT = "http://www.openslr.org/resources/31"
|
||||||
|
URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz"
|
||||||
|
URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz"
|
||||||
|
|
||||||
|
MD5_TRAIN_CLEAN = "5df7d4e78065366204ca6845bb08f490"
|
||||||
|
MD5_DEV_CLEAN = "6d7ab67ac6a1d2c993d050e16d61080d"
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target_dir",
|
||||||
|
default='~/.cache/paddle/dataset/speech/libri',
|
||||||
|
type=str,
|
||||||
|
help="Directory to save the dataset. (default: %(default)s)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--manifest_prefix",
|
||||||
|
default="manifest",
|
||||||
|
type=str,
|
||||||
|
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def create_manifest(data_dir, manifest_path):
|
||||||
|
"""Create a manifest json file summarizing the data set, with each line
|
||||||
|
containing the meta data (i.e. audio filepath, transcription text, audio
|
||||||
|
duration) of each audio file within the data set.
|
||||||
|
"""
|
||||||
|
print("Creating manifest %s ..." % manifest_path)
|
||||||
|
json_lines = []
|
||||||
|
for subfolder, _, filelist in sorted(os.walk(data_dir)):
|
||||||
|
text_filelist = [
|
||||||
|
filename for filename in filelist if filename.endswith('trans.txt')
|
||||||
|
]
|
||||||
|
if len(text_filelist) > 0:
|
||||||
|
text_filepath = os.path.join(subfolder, text_filelist[0])
|
||||||
|
for line in io.open(text_filepath, encoding="utf8"):
|
||||||
|
segments = line.strip().split()
|
||||||
|
text = ' '.join(segments[1:]).lower()
|
||||||
|
audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
|
||||||
|
audio_data, samplerate = soundfile.read(audio_filepath)
|
||||||
|
duration = float(len(audio_data)) / samplerate
|
||||||
|
json_lines.append(
|
||||||
|
json.dumps({
|
||||||
|
'audio_filepath': audio_filepath,
|
||||||
|
'duration': duration,
|
||||||
|
'text': text
|
||||||
|
}))
|
||||||
|
with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
|
||||||
|
for line in json_lines:
|
||||||
|
out_file.write(line + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||||
|
"""Download, unpack and create summmary manifest file.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
|
||||||
|
# download
|
||||||
|
filepath = download(url, md5sum, target_dir)
|
||||||
|
# unpack
|
||||||
|
unpack(filepath, target_dir)
|
||||||
|
else:
|
||||||
|
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||||
|
target_dir)
|
||||||
|
# create manifest json file
|
||||||
|
create_manifest(target_dir, manifest_path)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if args.target_dir.startswith('~'):
|
||||||
|
args.target_dir = os.path.expanduser(args.target_dir)
|
||||||
|
|
||||||
|
prepare_dataset(
|
||||||
|
url=URL_TRAIN_CLEAN,
|
||||||
|
md5sum=MD5_TRAIN_CLEAN,
|
||||||
|
target_dir=os.path.join(args.target_dir, "train-clean"),
|
||||||
|
manifest_path=args.manifest_prefix + ".train-clean")
|
||||||
|
prepare_dataset(
|
||||||
|
url=URL_DEV_CLEAN,
|
||||||
|
md5sum=MD5_DEV_CLEAN,
|
||||||
|
target_dir=os.path.join(args.target_dir, "dev-clean"),
|
||||||
|
manifest_path=args.manifest_prefix + ".dev-clean")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,123 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Prepare Aishell mandarin dataset
|
||||||
|
|
||||||
|
Download, unpack and create manifest files.
|
||||||
|
Manifest file is a json-format file with each line containing the
|
||||||
|
meta data (i.e. audio filepath, transcript and audio duration)
|
||||||
|
of each audio file in the data set.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
import codecs
|
||||||
|
import soundfile
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
from utils.utility import download, unpack
|
||||||
|
|
||||||
|
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
|
||||||
|
|
||||||
|
URL_ROOT = 'https://www.openslr.org/resources/17'
|
||||||
|
DATA_URL = URL_ROOT + '/musan.tar.gz'
|
||||||
|
MD5_DATA = ''
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target_dir",
|
||||||
|
default=DATA_HOME + "/musan",
|
||||||
|
type=str,
|
||||||
|
help="Directory to save the dataset. (default: %(default)s)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--manifest_prefix",
|
||||||
|
default="manifest",
|
||||||
|
type=str,
|
||||||
|
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def create_manifest(data_dir, manifest_path_prefix):
|
||||||
|
print("Creating manifest %s ..." % manifest_path_prefix)
|
||||||
|
json_lines = []
|
||||||
|
transcript_path = os.path.join(data_dir, 'transcript',
|
||||||
|
'aishell_transcript_v0.8.txt')
|
||||||
|
transcript_dict = {}
|
||||||
|
for line in codecs.open(transcript_path, 'r', 'utf-8'):
|
||||||
|
line = line.strip()
|
||||||
|
if line == '': continue
|
||||||
|
audio_id, text = line.split(' ', 1)
|
||||||
|
# remove withespace
|
||||||
|
text = ''.join(text.split())
|
||||||
|
transcript_dict[audio_id] = text
|
||||||
|
|
||||||
|
data_types = ['train', 'dev', 'test']
|
||||||
|
for type in data_types:
|
||||||
|
del json_lines[:]
|
||||||
|
audio_dir = os.path.join(data_dir, 'wav', type)
|
||||||
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||||
|
for fname in filelist:
|
||||||
|
audio_path = os.path.join(subfolder, fname)
|
||||||
|
audio_id = fname[:-4]
|
||||||
|
# if no transcription for audio then skipped
|
||||||
|
if audio_id not in transcript_dict:
|
||||||
|
continue
|
||||||
|
audio_data, samplerate = soundfile.read(audio_path)
|
||||||
|
duration = float(len(audio_data) / samplerate)
|
||||||
|
text = transcript_dict[audio_id]
|
||||||
|
json_lines.append(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
'audio_filepath': audio_path,
|
||||||
|
'duration': duration,
|
||||||
|
'text': text
|
||||||
|
},
|
||||||
|
ensure_ascii=False))
|
||||||
|
manifest_path = manifest_path_prefix + '.' + type
|
||||||
|
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
||||||
|
for line in json_lines:
|
||||||
|
fout.write(line + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||||
|
"""Download, unpack and create manifest file."""
|
||||||
|
data_dir = os.path.join(target_dir, 'data_aishell')
|
||||||
|
if not os.path.exists(data_dir):
|
||||||
|
filepath = download(url, md5sum, target_dir)
|
||||||
|
unpack(filepath, target_dir)
|
||||||
|
# unpack all audio tar files
|
||||||
|
audio_dir = os.path.join(data_dir, 'wav')
|
||||||
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||||
|
for ftar in filelist:
|
||||||
|
unpack(os.path.join(subfolder, ftar), subfolder, True)
|
||||||
|
else:
|
||||||
|
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||||
|
target_dir)
|
||||||
|
create_manifest(data_dir, manifest_path)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if args.target_dir.startswith('~'):
|
||||||
|
args.target_dir = os.path.expanduser(args.target_dir)
|
||||||
|
|
||||||
|
prepare_dataset(
|
||||||
|
url=DATA_URL,
|
||||||
|
md5sum=MD5_DATA,
|
||||||
|
target_dir=args.target_dir,
|
||||||
|
manifest_path=args.manifest_prefix)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,123 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Prepare Aishell mandarin dataset
|
||||||
|
|
||||||
|
Download, unpack and create manifest files.
|
||||||
|
Manifest file is a json-format file with each line containing the
|
||||||
|
meta data (i.e. audio filepath, transcript and audio duration)
|
||||||
|
of each audio file in the data set.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
import codecs
|
||||||
|
import soundfile
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
from data_utils.utility import download, unpack
|
||||||
|
|
||||||
|
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
|
||||||
|
|
||||||
|
URL_ROOT = 'http://www.openslr.org/resources/28'
|
||||||
|
DATA_URL = URL_ROOT + '/rirs_noises.zip'
|
||||||
|
MD5_DATA = ''
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target_dir",
|
||||||
|
default=DATA_HOME + "/Aishell",
|
||||||
|
type=str,
|
||||||
|
help="Directory to save the dataset. (default: %(default)s)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--manifest_prefix",
|
||||||
|
default="manifest",
|
||||||
|
type=str,
|
||||||
|
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def create_manifest(data_dir, manifest_path_prefix):
|
||||||
|
print("Creating manifest %s ..." % manifest_path_prefix)
|
||||||
|
json_lines = []
|
||||||
|
transcript_path = os.path.join(data_dir, 'transcript',
|
||||||
|
'aishell_transcript_v0.8.txt')
|
||||||
|
transcript_dict = {}
|
||||||
|
for line in codecs.open(transcript_path, 'r', 'utf-8'):
|
||||||
|
line = line.strip()
|
||||||
|
if line == '': continue
|
||||||
|
audio_id, text = line.split(' ', 1)
|
||||||
|
# remove withespace
|
||||||
|
text = ''.join(text.split())
|
||||||
|
transcript_dict[audio_id] = text
|
||||||
|
|
||||||
|
data_types = ['train', 'dev', 'test']
|
||||||
|
for type in data_types:
|
||||||
|
del json_lines[:]
|
||||||
|
audio_dir = os.path.join(data_dir, 'wav', type)
|
||||||
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||||
|
for fname in filelist:
|
||||||
|
audio_path = os.path.join(subfolder, fname)
|
||||||
|
audio_id = fname[:-4]
|
||||||
|
# if no transcription for audio then skipped
|
||||||
|
if audio_id not in transcript_dict:
|
||||||
|
continue
|
||||||
|
audio_data, samplerate = soundfile.read(audio_path)
|
||||||
|
duration = float(len(audio_data) / samplerate)
|
||||||
|
text = transcript_dict[audio_id]
|
||||||
|
json_lines.append(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
'audio_filepath': audio_path,
|
||||||
|
'duration': duration,
|
||||||
|
'text': text
|
||||||
|
},
|
||||||
|
ensure_ascii=False))
|
||||||
|
manifest_path = manifest_path_prefix + '.' + type
|
||||||
|
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
||||||
|
for line in json_lines:
|
||||||
|
fout.write(line + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_dataset(url, md5sum, target_dir, manifest_path):
|
||||||
|
"""Download, unpack and create manifest file."""
|
||||||
|
data_dir = os.path.join(target_dir, 'data_aishell')
|
||||||
|
if not os.path.exists(data_dir):
|
||||||
|
filepath = download(url, md5sum, target_dir)
|
||||||
|
unpack(filepath, target_dir)
|
||||||
|
# unpack all audio tar files
|
||||||
|
audio_dir = os.path.join(data_dir, 'wav')
|
||||||
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||||
|
for ftar in filelist:
|
||||||
|
unpack(os.path.join(subfolder, ftar), subfolder, True)
|
||||||
|
else:
|
||||||
|
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||||
|
target_dir)
|
||||||
|
create_manifest(data_dir, manifest_path)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if args.target_dir.startswith('~'):
|
||||||
|
args.target_dir = os.path.expanduser(args.target_dir)
|
||||||
|
|
||||||
|
prepare_dataset(
|
||||||
|
url=DATA_URL,
|
||||||
|
md5sum=MD5_DATA,
|
||||||
|
target_dir=args.target_dir,
|
||||||
|
manifest_path=args.manifest_prefix)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -1,9 +1,12 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
TARGET_DIR=${MAIN_ROOT}/examples/dataset/voxforge
|
||||||
|
mkdir -p ${TARGET_DIR}
|
||||||
|
|
||||||
# download data, generate manifests
|
# download data, generate manifests
|
||||||
PYTHONPATH=../../:$PYTHONPATH python voxforge.py \
|
python ${MAIN_ROOT}/examples/dataset/voxforge/voxforge.py \
|
||||||
--manifest_prefix='./manifest' \
|
--manifest_prefix="${TARGET_DIR}/manifest" \
|
||||||
--target_dir='./dataset/VoxForge' \
|
--target_dir="${TARGET_DIR}" \
|
||||||
--is_merge_dialect=True \
|
--is_merge_dialect=True \
|
||||||
--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'
|
--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
data
|
||||||
|
ckpt*
|
@ -0,0 +1,20 @@
|
|||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
. ${MAIN_ROOT}/utils/utility.sh
|
||||||
|
|
||||||
|
DIR=data/lm
|
||||||
|
mkdir -p ${DIR}
|
||||||
|
|
||||||
|
URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
|
||||||
|
MD5="099a601759d467cd0a8523ff939819c5"
|
||||||
|
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
|
||||||
|
|
||||||
|
echo "Download language model ..."
|
||||||
|
download $URL $MD5 $TARGET
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Fail to download the language model!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
exit 0
|
@ -1,43 +1,21 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
# download language model
|
# download language model
|
||||||
cd ${MAIN_ROOT}/models/lm > /dev/null
|
bash local/download_lm_en.sh
|
||||||
bash download_lm_en.sh
|
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
cd - > /dev/null
|
|
||||||
|
|
||||||
|
python3 -u ${BIN_DIR}/infer.py \
|
||||||
|
--device 'gpu' \
|
||||||
|
--nproc 1 \
|
||||||
|
--config conf/deepspeech2.yaml \
|
||||||
|
--output ckpt
|
||||||
|
|
||||||
# infer
|
|
||||||
CUDA_VISIBLE_DEVICES=0 \
|
|
||||||
python3 -u ${MAIN_ROOT}/infer.py \
|
|
||||||
--num_samples=10 \
|
|
||||||
--beam_size=500 \
|
|
||||||
--num_proc_bsearch=8 \
|
|
||||||
--num_conv_layers=2 \
|
|
||||||
--num_rnn_layers=3 \
|
|
||||||
--rnn_layer_size=2048 \
|
|
||||||
--alpha=2.5 \
|
|
||||||
--beta=0.3 \
|
|
||||||
--cutoff_prob=1.0 \
|
|
||||||
--cutoff_top_n=40 \
|
|
||||||
--use_gru=False \
|
|
||||||
--use_gpu=True \
|
|
||||||
--share_rnn_weights=True \
|
|
||||||
--infer_manifest="data/manifest.test-clean" \
|
|
||||||
--mean_std_path="data/mean_std.npz" \
|
|
||||||
--vocab_path="data/vocab.txt" \
|
|
||||||
--model_path="checkpoints/step_final" \
|
|
||||||
--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
|
|
||||||
--decoding_method="ctc_beam_search" \
|
|
||||||
--error_rate_type="wer" \
|
|
||||||
--specgram_type="linear"
|
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
echo "Failed in inference!"
|
echo "Failed in inference!"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue