format code,test=doc

pull/1483/head
Hui Zhang 2 years ago
parent 0a5624fe61
commit 6b1fe70100

@ -50,12 +50,13 @@ repos:
entry: bash .pre-commit-hooks/clang-format.hook -i entry: bash .pre-commit-hooks/clang-format.hook -i
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
- id: copyright_checker - id: copyright_checker
name: copyright_checker name: copyright_checker
entry: python .pre-commit-hooks/copyright-check.hook entry: python .pre-commit-hooks/copyright-check.hook
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$ exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
- repo: https://github.com/asottile/reorder_python_imports - repo: https://github.com/asottile/reorder_python_imports
rev: v2.4.0 rev: v2.4.0
hooks: hooks:

@ -80,6 +80,7 @@ parser.add_argument(
args = parser.parse_args() args = parser.parse_args()
def create_manifest(data_dir, manifest_path_prefix): def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix) print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = [] json_lines = []
@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix):
print(f"{total_text / total_sec} text/sec", file=f) print(f"{total_text / total_sec} text/sec", file=f)
print(f"{total_sec / total_num} sec/utt", file=f) print(f"{total_sec / total_num} sec/utt", file=f)
def prepare_dataset(base_url, data_list, target_dir, manifest_path, def prepare_dataset(base_url, data_list, target_dir, manifest_path,
target_data): target_data):
if not os.path.exists(target_dir): if not os.path.exists(target_dir):
@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file # create the manifest file
create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path) create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
def main(): def main():
if args.target_dir.startswith('~'): if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir) args.target_dir = os.path.expanduser(args.target_dir)
@ -184,5 +187,6 @@ def main():
print("Manifest prepare done!") print("Manifest prepare done!")
if __name__ == '__main__': if __name__ == '__main__':
main() main()

@ -22,19 +22,17 @@ Authors
* qingenz123@126.com (Qingen ZHAO) 2022 * qingenz123@126.com (Qingen ZHAO) 2022
""" """
import os
import logging
import argparse import argparse
import xml.etree.ElementTree as et
import glob import glob
import json import json
from ami_splits import get_AMI_split import logging
import os
import xml.etree.ElementTree as et
from distutils.util import strtobool from distutils.util import strtobool
from dataio import ( from ami_splits import get_AMI_split
load_pkl, from dataio import load_pkl
save_pkl, ) from dataio import save_pkl
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SAMPLERATE = 16000 SAMPLERATE = 16000

@ -12,28 +12,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Make VoxCeleb1 trial of kaldi format Make VoxCeleb1 trial of kaldi format
this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt
to kaldi trial format to kaldi trial format
""" """
import argparse import argparse
import codecs import codecs
import os import os
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--voxceleb_trial", parser.add_argument(
default="voxceleb1_test_v2", "--voxceleb_trial",
type=str, default="voxceleb1_test_v2",
help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt") type=str,
parser.add_argument("--trial", help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
default="data/test/trial", )
type=str, parser.add_argument(
help="Kaldi format trial file") "--trial",
default="data/test/trial",
type=str,
help="Kaldi format trial file")
args = parser.parse_args() args = parser.parse_args()
def main(voxceleb_trial, trial): def main(voxceleb_trial, trial):
""" """
VoxCeleb provide several trial file, which format is different with kaldi format. VoxCeleb provide several trial file, which format is different with kaldi format.
@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
""" """
print("Start convert the voxceleb trial to kaldi format") print("Start convert the voxceleb trial to kaldi format")
if not os.path.exists(voxceleb_trial): if not os.path.exists(voxceleb_trial):
raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial)) raise RuntimeError(
"{} does not exist. Pleas input the correct file path".format(
voxceleb_trial))
trial_dirname = os.path.dirname(trial) trial_dirname = os.path.dirname(trial)
if not os.path.exists(trial_dirname): if not os.path.exists(trial_dirname):
@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \ with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
codecs.open(trial, 'w', encoding='utf-8') as w: codecs.open(trial, 'w', encoding='utf-8') as w:
for line in f: for line in f:
target_or_nontarget, path1, path2 = line.strip().split() target_or_nontarget, path1, path2 = line.strip().split()
utt_id1 = "-".join(path1.split("/")) utt_id1 = "-".join(path1.split("/"))
utt_id2 = "-".join(path2.split("/")) utt_id2 = "-".join(path2.split("/"))
target = "nontarget" target = "nontarget"
@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
w.write("{} {} {}\n".format(utt_id1, utt_id2, target)) w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
print("Convert the voxceleb trial to kaldi format successfully") print("Convert the voxceleb trial to kaldi format successfully")
if __name__ == "__main__": if __name__ == "__main__":
main(args.voxceleb_trial, args.trial) main(args.voxceleb_trial, args.trial)

@ -11,14 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.

@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor):
def _check(self, audio_file: str, sample_rate: int, force_yes: bool): def _check(self, audio_file: str, sample_rate: int, force_yes: bool):
self.sample_rate = sample_rate self.sample_rate = sample_rate
if self.sample_rate != 16000 and self.sample_rate != 8000: if self.sample_rate != 16000 and self.sample_rate != 8000:
logger.error("invalid sample rate, please input --sr 8000 or --sr 16000") logger.error(
"invalid sample rate, please input --sr 8000 or --sr 16000")
return False return False
if isinstance(audio_file, (str, os.PathLike)): if isinstance(audio_file, (str, os.PathLike)):

@ -11,8 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import List
from io import BytesIO from io import BytesIO
from typing import List
import numpy as np import numpy as np

@ -258,4 +258,4 @@ class ChainDataset(Dataset):
return dataset[i] return dataset[i]
i -= len(dataset) i -= len(dataset)
raise IndexError("dataset index out of range") raise IndexError("dataset index out of range")

@ -23,10 +23,11 @@ Credits
This code is adapted from https://github.com/nryant/dscore This code is adapted from https://github.com/nryant/dscore
""" """
import argparse import argparse
from distutils.util import strtobool
import os import os
import re import re
import subprocess import subprocess
from distutils.util import strtobool
import numpy as np import numpy as np
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")

Loading…
Cancel
Save