format code,test=doc

pull/1483/head
Hui Zhang 2 years ago
parent 0a5624fe61
commit 6b1fe70100

@ -50,12 +50,13 @@ repos:
entry: bash .pre-commit-hooks/clang-format.hook -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
- id: copyright_checker
name: copyright_checker
entry: python .pre-commit-hooks/copyright-check.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$
exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
- repo: https://github.com/asottile/reorder_python_imports
rev: v2.4.0
hooks:

@ -80,6 +80,7 @@ parser.add_argument(
args = parser.parse_args()
def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = []
@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix):
print(f"{total_text / total_sec} text/sec", file=f)
print(f"{total_sec / total_num} sec/utt", file=f)
def prepare_dataset(base_url, data_list, target_dir, manifest_path,
target_data):
if not os.path.exists(target_dir):
@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file
create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)
@ -184,5 +187,6 @@ def main():
print("Manifest prepare done!")
if __name__ == '__main__':
main()

@ -22,19 +22,17 @@ Authors
* qingenz123@126.com (Qingen ZHAO) 2022
"""
import os
import logging
import argparse
import xml.etree.ElementTree as et
import glob
import json
from ami_splits import get_AMI_split
import logging
import os
import xml.etree.ElementTree as et
from distutils.util import strtobool
from dataio import (
load_pkl,
save_pkl, )
from ami_splits import get_AMI_split
from dataio import load_pkl
from dataio import save_pkl
logger = logging.getLogger(__name__)
SAMPLERATE = 16000

@ -12,28 +12,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Make VoxCeleb1 trial of kaldi format
this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt
to kaldi trial format
"""
import argparse
import codecs
import os
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--voxceleb_trial",
default="voxceleb1_test_v2",
type=str,
help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt")
parser.add_argument("--trial",
default="data/test/trial",
type=str,
help="Kaldi format trial file")
parser.add_argument(
"--voxceleb_trial",
default="voxceleb1_test_v2",
type=str,
help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
)
parser.add_argument(
"--trial",
default="data/test/trial",
type=str,
help="Kaldi format trial file")
args = parser.parse_args()
def main(voxceleb_trial, trial):
"""
VoxCeleb provide several trial file, which format is different with kaldi format.
@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
"""
print("Start convert the voxceleb trial to kaldi format")
if not os.path.exists(voxceleb_trial):
raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial))
raise RuntimeError(
"{} does not exist. Pleas input the correct file path".format(
voxceleb_trial))
trial_dirname = os.path.dirname(trial)
if not os.path.exists(trial_dirname):
@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
codecs.open(trial, 'w', encoding='utf-8') as w:
for line in f:
for line in f:
target_or_nontarget, path1, path2 = line.strip().split()
utt_id1 = "-".join(path1.split("/"))
utt_id2 = "-".join(path2.split("/"))
target = "nontarget"
@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
print("Convert the voxceleb trial to kaldi format successfully")
if __name__ == "__main__":
main(args.voxceleb_trial, args.trial)

@ -11,14 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor):
def _check(self, audio_file: str, sample_rate: int, force_yes: bool):
self.sample_rate = sample_rate
if self.sample_rate != 16000 and self.sample_rate != 8000:
logger.error("invalid sample rate, please input --sr 8000 or --sr 16000")
logger.error(
"invalid sample rate, please input --sr 8000 or --sr 16000")
return False
if isinstance(audio_file, (str, os.PathLike)):

@ -11,8 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from io import BytesIO
from typing import List
import numpy as np

@ -258,4 +258,4 @@ class ChainDataset(Dataset):
return dataset[i]
i -= len(dataset)
raise IndexError("dataset index out of range")
raise IndexError("dataset index out of range")

@ -23,10 +23,11 @@ Credits
This code is adapted from https://github.com/nryant/dscore
"""
import argparse
from distutils.util import strtobool
import os
import re
import subprocess
from distutils.util import strtobool
import numpy as np
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")

Loading…
Cancel
Save