【Hackathon 7th】add implemention of strtobool (#3877)

* add implemention of strtobool

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review
pull/3882/head
张春乔 10 months ago committed by GitHub
parent 21b55419c7
commit 290d161d8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -30,6 +30,7 @@ import soundfile
from paddlespeech.dataset.download import download from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack from paddlespeech.dataset.download import unpack
from paddlespeech.utils.argparse import strtobool
URL_ROOT = "http://openslr.elda.org/resources/12" URL_ROOT = "http://openslr.elda.org/resources/12"
#URL_ROOT = "https://openslr.magicdatatech.com/resources/12" #URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
@ -63,7 +64,7 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--full_download", "--full_download",
default="True", default="True",
type=distutils.util.strtobool, type=strtobool,
help="Download all datasets for Librispeech." help="Download all datasets for Librispeech."
" If False, only download a minimal requirement (test-clean, dev-clean" " If False, only download a minimal requirement (test-clean, dev-clean"
" train-clean-100). (default: %(default)s)") " train-clean-100). (default: %(default)s)")

@ -28,7 +28,8 @@ import xml.etree.ElementTree as et
from ami_splits import get_AMI_split from ami_splits import get_AMI_split
from dataio import load_pkl from dataio import load_pkl
from dataio import save_pkl from dataio import save_pkl
from distutils.util import strtobool
from paddlespeech.utils.argparse import strtobool
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SAMPLERATE = 16000 SAMPLERATE = 16000

@ -21,7 +21,8 @@ import sys
import configargparse import configargparse
import numpy as np import numpy as np
from distutils.util import strtobool
from paddlespeech.utils.argparse import strtobool
def get_parser(): def get_parser():

@ -27,6 +27,7 @@ from paddlespeech.s2t.models.whisper import transcribe
from paddlespeech.s2t.models.whisper import Whisper from paddlespeech.s2t.models.whisper import Whisper
from paddlespeech.s2t.training.cli import default_argument_parser from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log from paddlespeech.s2t.utils.log import Log
from paddlespeech.utils.argparse import strtobool
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
@ -103,10 +104,7 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--audio_file", type=str, help="path of the input audio file") "--audio_file", type=str, help="path of the input audio file")
parser.add_argument( parser.add_argument(
"--debug", "--debug", type=strtobool, default=False, help="for debug.")
type=distutils.util.strtobool,
default=False,
help="for debug.")
args = parser.parse_args() args = parser.parse_args()
config = CfgNode(new_allowed=True) config = CfgNode(new_allowed=True)

@ -16,6 +16,8 @@ import argparse
import distutils import distutils
from yacs.config import CfgNode from yacs.config import CfgNode
from paddlespeech.utils.argparse import strtobool
class ExtendAction(argparse.Action): class ExtendAction(argparse.Action):
""" """
@ -73,7 +75,7 @@ def default_argument_parser(parser=None):
'--conf', type=open, action=LoadFromFile, help="config file.") '--conf', type=open, action=LoadFromFile, help="config file.")
parser.add_argument( parser.add_argument(
"--debug", "--debug",
type=distutils.util.strtobool, type=strtobool,
default=False, default=False,
help="logging with debug mode.") help="logging with debug mode.")
parser.add_argument( parser.add_argument(

@ -16,11 +16,12 @@ import sys
from collections.abc import Sequence from collections.abc import Sequence
import numpy import numpy
from distutils.util import strtobool as dist_strtobool
from paddlespeech.utils.argparse import strtobool as dist_strtobool
def strtobool(x): def strtobool(x):
# distutils.util.strtobool returns integer, but it's confusing, # paddlespeech.utils.argparse.strtobool returns integer, but it's confusing,
return bool(dist_strtobool(x)) return bool(dist_strtobool(x))

@ -18,7 +18,9 @@ from typing import Text
import distutils import distutils
__all__ = ["print_arguments", "add_arguments", "get_commandline_args"] __all__ = [
"print_arguments", "add_arguments", "get_commandline_args", "strtobool"
]
def get_commandline_args(): def get_commandline_args():
@ -80,6 +82,27 @@ def print_arguments(args, info=None):
print("-----------------------------------------------------------") print("-----------------------------------------------------------")
def strtobool(value):
"""Convert a string value to an integer boolean (1 for True, 0 for False).
The function recognizes the following strings as True (case insensitive):
- "yes"
- "true"
- "1"
All other values are considered False.
NOTE: After Python 3.10, the distutils module, particularly distutils.util, has been partially deprecated. To maintain compatibility with existing code, the strtobool function implemented here.
"""
if isinstance(value, bool):
return int(value)
value = value.strip().lower()
if value in ('yes', 'true', '1'):
return 1
else:
return 0
def add_arguments(argname, type, default, help, argparser, **kwargs): def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument. """Add argparse's argument.
@ -91,7 +114,7 @@ def add_arguments(argname, type, default, help, argparser, **kwargs):
add_argument("name", str, "Jonh", "User name.", parser) add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args() args = parser.parse_args()
""" """
type = distutils.util.strtobool if type == bool else type type = strtobool if type == bool else type
argparser.add_argument( argparser.add_argument(
"--" + argname, "--" + argname,
default=default, default=default,

@ -24,7 +24,6 @@ import warnings
import numpy as np import numpy as np
import scipy import scipy
import sklearn import sklearn
from distutils.util import strtobool
from scipy import linalg from scipy import linalg
from scipy import sparse from scipy import sparse
from scipy.sparse.csgraph import connected_components from scipy.sparse.csgraph import connected_components
@ -34,6 +33,8 @@ from sklearn.cluster import SpectralClustering
from sklearn.cluster._kmeans import k_means from sklearn.cluster._kmeans import k_means
from sklearn.neighbors import kneighbors_graph from sklearn.neighbors import kneighbors_graph
from paddlespeech.utils.argparse import strtobool
def _graph_connected_component(graph, node_id): def _graph_connected_component(graph, node_id):
""" """

@ -28,7 +28,8 @@ import re
import subprocess import subprocess
import numpy as np import numpy as np
from distutils.util import strtobool
from paddlespeech.utils.argparse import strtobool
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+") SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")

@ -11,9 +11,10 @@ import json
import logging import logging
import sys import sys
from distutils.util import strtobool
from espnet.utils.cli_utils import get_commandline_args from espnet.utils.cli_utils import get_commandline_args
from paddlespeech.utils.argparse import strtobool
is_python2 = sys.version_info[0] == 2 is_python2 = sys.version_info[0] == 2

@ -4,13 +4,13 @@ import logging
import kaldiio import kaldiio
import numpy import numpy
from distutils.util import strtobool
from paddlespeech.audio.transform.cmvn import CMVN from paddlespeech.audio.transform.cmvn import CMVN
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style
from paddlespeech.s2t.utils.cli_writers import file_writer_helper from paddlespeech.s2t.utils.cli_writers import file_writer_helper
from paddlespeech.utils.argparse import strtobool
def get_parser(): def get_parser():

@ -2,13 +2,12 @@
import argparse import argparse
import logging import logging
from distutils.util import strtobool
from paddlespeech.audio.transform.transformation import Transformation from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style
from paddlespeech.s2t.utils.cli_writers import file_writer_helper from paddlespeech.s2t.utils.cli_writers import file_writer_helper
from paddlespeech.utils.argparse import strtobool
def get_parser(): def get_parser():

@ -7,9 +7,8 @@ import logging
import sys import sys
from io import open from io import open
from distutils.util import strtobool
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.utils.argparse import strtobool
PY2 = sys.version_info[0] == 2 PY2 = sys.version_info[0] == 2
sys.stdin = codecs.getreader("utf-8")(sys.stdin if PY2 else sys.stdin.buffer) sys.stdin = codecs.getreader("utf-8")(sys.stdin if PY2 else sys.stdin.buffer)

Loading…
Cancel
Save