|
|
|
@ -1,7 +1,8 @@
|
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""remove longshort data from manifest"""
|
|
|
|
|
import logging
|
|
|
|
|
import argparse
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
import jsonlines
|
|
|
|
|
|
|
|
|
|
from paddlespeech.s2t.utils.cli_utils import get_commandline_args
|
|
|
|
@ -23,17 +24,19 @@ def get_parser():
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--verbose", "-V", default=0, type=int, help="Verbose option")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--iaxis", default=0, type=int, help="multi inputs index, 0 is the first")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--oaxis", default=0, type=int, help="multi outputs index, 0 is the first")
|
|
|
|
|
"--iaxis",
|
|
|
|
|
default=0,
|
|
|
|
|
type=int,
|
|
|
|
|
help="multi inputs index, 0 is the first")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--maxframes", default=2000, type=int, help="maxframes")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--minframes", default=10, type=int, help="minframes")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--maxchars", default=200, type=int, help="max tokens")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--minchars", default=0, type=int, help="min tokens")
|
|
|
|
|
"--oaxis",
|
|
|
|
|
default=0,
|
|
|
|
|
type=int,
|
|
|
|
|
help="multi outputs index, 0 is the first")
|
|
|
|
|
parser.add_argument("--maxframes", default=2000, type=int, help="maxframes")
|
|
|
|
|
parser.add_argument("--minframes", default=10, type=int, help="minframes")
|
|
|
|
|
parser.add_argument("--maxchars", default=200, type=int, help="max tokens")
|
|
|
|
|
parser.add_argument("--minchars", default=0, type=int, help="min tokens")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--stride_ms", default=10, type=int, help="stride in ms unit.")
|
|
|
|
|
parser.add_argument(
|
|
|
|
@ -83,7 +86,7 @@ def main():
|
|
|
|
|
lines = list(reader)
|
|
|
|
|
logging.info(f"Example: {len(lines)}")
|
|
|
|
|
feat = lines[0]['input'][args.iaxis]['feat']
|
|
|
|
|
args.soud = False
|
|
|
|
|
args.soud = False
|
|
|
|
|
if feat.split('.')[-1] not in 'ark, scp':
|
|
|
|
|
args.sound = True
|
|
|
|
|
|
|
|
|
@ -98,5 +101,6 @@ def main():
|
|
|
|
|
count += 1
|
|
|
|
|
logging.info(f"Example after filter: {count}\{filter}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|