@ -20,12 +20,12 @@ of each audio file in the data set.
"""
import argparse
import codecs
import distutils.util
import io
import json
import os
from multiprocessing.pool import Pool
import soundfile
from utils.utility import download
@ -16,8 +16,8 @@ import os
import librosa
import numpy as np
from config import DEFAULT_TABLE
from logs import LOGGER
from paddlespeech.cli import VectorExecutor
vector_executor = VectorExecutor()
@ -26,8 +26,7 @@ def get_audios(path):
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [
item
for sublist in [[os.path.join(dir, file) for file in files]
item for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats
]
@ -24,11 +24,11 @@ import json
import logging
import xml.etree.ElementTree as et
from distutils.util import strtobool
from ami_splits import get_AMI_split
from dataio import load_pkl
from dataio import save_pkl
logger = logging.getLogger(__name__)
SAMPLERATE = 16000
@ -17,10 +17,10 @@ import logging
import random
import sys
import configargparse
def get_parser():
@ -14,9 +14,9 @@
# Modified from espnet(https://github.com/espnet/espnet)
from collections.abc import Sequence
from distutils.util import strtobool as dist_strtobool
import numpy
def strtobool(x):
@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common utility functions."""
import math
@ -21,6 +20,7 @@ from contextlib import contextmanager
from pprint import pformat
from typing import List
import paddle
@ -18,11 +18,11 @@ A few sklearn functions are modified in this script as per requirement.
import warnings
import scipy
import sklearn
from scipy import sparse
from scipy.sparse.csgraph import connected_components
from scipy.sparse.csgraph import laplacian as csgraph_laplacian
@ -26,9 +26,9 @@ import argparse
import re
import subprocess
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")
@ -10,8 +10,8 @@ import codecs
from espnet.utils.cli_utils import get_commandline_args
is_python2 = sys.version_info[0] == 2
@ -1,10 +1,10 @@
#!/usr/bin/env python3
import kaldiio
from paddlespeech.s2t.transform.cmvn import CMVN
from paddlespeech.s2t.utils.cli_readers import file_reader_helper
@ -1,6 +1,7 @@
from paddlespeech.s2t.transform.transformation import Transformation
@ -5,9 +5,10 @@ import codecs
from io import open
from paddlespeech.s2t.utils.cli_utils import get_commandline_args
PY2 = sys.version_info[0] == 2