diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 44567b0cf..2f5f9016c 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -30,6 +30,7 @@ import soundfile from paddlespeech.dataset.download import download from paddlespeech.dataset.download import unpack +from paddlespeech.utils.argparse import strtobool URL_ROOT = "http://openslr.elda.org/resources/12" #URL_ROOT = "https://openslr.magicdatatech.com/resources/12" @@ -63,7 +64,7 @@ parser.add_argument( parser.add_argument( "--full_download", default="True", - type=distutils.util.strtobool, + type=strtobool, help="Download all datasets for Librispeech." " If False, only download a minimal requirement (test-clean, dev-clean" " train-clean-100). (default: %(default)s)") diff --git a/examples/ami/sd0/local/ami_prepare.py b/examples/ami/sd0/local/ami_prepare.py index 1f02afe00..e38eb7131 100644 --- a/examples/ami/sd0/local/ami_prepare.py +++ b/examples/ami/sd0/local/ami_prepare.py @@ -28,7 +28,8 @@ import xml.etree.ElementTree as et from ami_splits import get_AMI_split from dataio import load_pkl from dataio import save_pkl -from distutils.util import strtobool + +from paddlespeech.utils.argparse import strtobool logger = logging.getLogger(__name__) SAMPLERATE = 16000 diff --git a/paddlespeech/s2t/decoders/recog_bin.py b/paddlespeech/s2t/decoders/recog_bin.py index 37b49f3a0..829b2b4a7 100644 --- a/paddlespeech/s2t/decoders/recog_bin.py +++ b/paddlespeech/s2t/decoders/recog_bin.py @@ -21,7 +21,8 @@ import sys import configargparse import numpy as np -from distutils.util import strtobool + +from paddlespeech.utils.argparse import strtobool def get_parser(): diff --git a/paddlespeech/s2t/exps/whisper/test_wav.py b/paddlespeech/s2t/exps/whisper/test_wav.py index e04eec4f2..d9c32a406 100644 --- a/paddlespeech/s2t/exps/whisper/test_wav.py +++ b/paddlespeech/s2t/exps/whisper/test_wav.py @@ -27,6 +27,7 @@ from paddlespeech.s2t.models.whisper import transcribe from paddlespeech.s2t.models.whisper import Whisper from paddlespeech.s2t.training.cli import default_argument_parser from paddlespeech.s2t.utils.log import Log +from paddlespeech.utils.argparse import strtobool logger = Log(__name__).getlog() @@ -103,10 +104,7 @@ if __name__ == "__main__": parser.add_argument( "--audio_file", type=str, help="path of the input audio file") parser.add_argument( - "--debug", - type=distutils.util.strtobool, - default=False, - help="for debug.") + "--debug", type=strtobool, default=False, help="for debug.") args = parser.parse_args() config = CfgNode(new_allowed=True) diff --git a/paddlespeech/s2t/training/cli.py b/paddlespeech/s2t/training/cli.py index 741b95dff..ded2aff9f 100644 --- a/paddlespeech/s2t/training/cli.py +++ b/paddlespeech/s2t/training/cli.py @@ -16,6 +16,8 @@ import argparse import distutils from yacs.config import CfgNode +from paddlespeech.utils.argparse import strtobool + class ExtendAction(argparse.Action): """ @@ -73,7 +75,7 @@ def default_argument_parser(parser=None): '--conf', type=open, action=LoadFromFile, help="config file.") parser.add_argument( "--debug", - type=distutils.util.strtobool, + type=strtobool, default=False, help="logging with debug mode.") parser.add_argument( diff --git a/paddlespeech/s2t/utils/cli_utils.py b/paddlespeech/s2t/utils/cli_utils.py index ccb0d3c97..ab93723b4 100644 --- a/paddlespeech/s2t/utils/cli_utils.py +++ b/paddlespeech/s2t/utils/cli_utils.py @@ -16,11 +16,12 @@ import sys from collections.abc import Sequence import numpy -from distutils.util import strtobool as dist_strtobool + +from paddlespeech.utils.argparse import strtobool as dist_strtobool def strtobool(x): - # distutils.util.strtobool returns integer, but it's confusing, + # paddlespeech.utils.argparse.strtobool returns integer, but it's confusing, return bool(dist_strtobool(x)) diff --git a/paddlespeech/utils/argparse.py b/paddlespeech/utils/argparse.py index aad3801ea..3ebefb826 100644 --- a/paddlespeech/utils/argparse.py +++ b/paddlespeech/utils/argparse.py @@ -18,7 +18,9 @@ from typing import Text import distutils -__all__ = ["print_arguments", "add_arguments", "get_commandline_args"] +__all__ = [ + "print_arguments", "add_arguments", "get_commandline_args", "strtobool" +] def get_commandline_args(): @@ -80,6 +82,27 @@ def print_arguments(args, info=None): print("-----------------------------------------------------------") +def strtobool(value): + """Convert a string value to an integer boolean (1 for True, 0 for False). + + The function recognizes the following strings as True (case insensitive): + - "yes" + - "true" + - "1" + + All other values are considered False. + + NOTE: After Python 3.10, the distutils module, particularly distutils.util, has been partially deprecated. To maintain compatibility with existing code, the strtobool function implemented here. + """ + if isinstance(value, bool): + return int(value) + value = value.strip().lower() + if value in ('yes', 'true', '1'): + return 1 + else: + return 0 + + def add_arguments(argname, type, default, help, argparser, **kwargs): """Add argparse's argument. @@ -91,7 +114,7 @@ def add_arguments(argname, type, default, help, argparser, **kwargs): add_argument("name", str, "Jonh", "User name.", parser) args = parser.parse_args() """ - type = distutils.util.strtobool if type == bool else type + type = strtobool if type == bool else type argparser.add_argument( "--" + argname, default=default, diff --git a/paddlespeech/vector/cluster/diarization.py b/paddlespeech/vector/cluster/diarization.py index b47b3f248..1de055c85 100644 --- a/paddlespeech/vector/cluster/diarization.py +++ b/paddlespeech/vector/cluster/diarization.py @@ -24,7 +24,6 @@ import warnings import numpy as np import scipy import sklearn -from distutils.util import strtobool from scipy import linalg from scipy import sparse from scipy.sparse.csgraph import connected_components @@ -34,6 +33,8 @@ from sklearn.cluster import SpectralClustering from sklearn.cluster._kmeans import k_means from sklearn.neighbors import kneighbors_graph +from paddlespeech.utils.argparse import strtobool + def _graph_connected_component(graph, node_id): """ diff --git a/utils/DER.py b/utils/DER.py index 59bcbec47..d12620199 100755 --- a/utils/DER.py +++ b/utils/DER.py @@ -28,7 +28,8 @@ import re import subprocess import numpy as np -from distutils.util import strtobool + +from paddlespeech.utils.argparse import strtobool FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+") diff --git a/utils/addjson.py b/utils/addjson.py index e1be7ab31..f90f7afab 100755 --- a/utils/addjson.py +++ b/utils/addjson.py @@ -11,9 +11,10 @@ import json import logging import sys -from distutils.util import strtobool from espnet.utils.cli_utils import get_commandline_args +from paddlespeech.utils.argparse import strtobool + is_python2 = sys.version_info[0] == 2 diff --git a/utils/apply-cmvn.py b/utils/apply-cmvn.py index fa69ff8e0..872d69608 100755 --- a/utils/apply-cmvn.py +++ b/utils/apply-cmvn.py @@ -4,13 +4,13 @@ import logging import kaldiio import numpy -from distutils.util import strtobool from paddlespeech.audio.transform.cmvn import CMVN from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_writers import file_writer_helper +from paddlespeech.utils.argparse import strtobool def get_parser(): diff --git a/utils/copy-feats.py b/utils/copy-feats.py index 89ea30f97..8f38dc8ee 100755 --- a/utils/copy-feats.py +++ b/utils/copy-feats.py @@ -2,13 +2,12 @@ import argparse import logging -from distutils.util import strtobool - from paddlespeech.audio.transform.transformation import Transformation from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_writers import file_writer_helper +from paddlespeech.utils.argparse import strtobool def get_parser(): diff --git a/utils/merge_scp2json.py b/utils/merge_scp2json.py index 99db6bac8..98f6cae84 100755 --- a/utils/merge_scp2json.py +++ b/utils/merge_scp2json.py @@ -7,9 +7,8 @@ import logging import sys from io import open -from distutils.util import strtobool - from paddlespeech.s2t.utils.cli_utils import get_commandline_args +from paddlespeech.utils.argparse import strtobool PY2 = sys.version_info[0] == 2 sys.stdin = codecs.getreader("utf-8")(sys.stdin if PY2 else sys.stdin.buffer)