|
|
@ -96,6 +96,10 @@ def create_manifest(data_dir, manifest_path_prefix):
|
|
|
|
data_types = ['train', 'dev', 'test']
|
|
|
|
data_types = ['train', 'dev', 'test']
|
|
|
|
for dtype in data_types:
|
|
|
|
for dtype in data_types:
|
|
|
|
del json_lines[:]
|
|
|
|
del json_lines[:]
|
|
|
|
|
|
|
|
total_sec = 0.0
|
|
|
|
|
|
|
|
total_text = 0.0
|
|
|
|
|
|
|
|
total_num = 0
|
|
|
|
|
|
|
|
|
|
|
|
audio_dir = os.path.join(data_dir, dtype)
|
|
|
|
audio_dir = os.path.join(data_dir, dtype)
|
|
|
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
|
|
|
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
|
|
|
for fname in filelist:
|
|
|
|
for fname in filelist:
|
|
|
@ -125,11 +129,23 @@ def create_manifest(data_dir, manifest_path_prefix):
|
|
|
|
},
|
|
|
|
},
|
|
|
|
ensure_ascii=False))
|
|
|
|
ensure_ascii=False))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
total_sec += duration
|
|
|
|
|
|
|
|
total_text += len(text)
|
|
|
|
|
|
|
|
total_num += 1
|
|
|
|
|
|
|
|
|
|
|
|
manifest_path = manifest_path_prefix + '.' + dtype
|
|
|
|
manifest_path = manifest_path_prefix + '.' + dtype
|
|
|
|
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
|
|
|
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
|
|
|
for line in json_lines:
|
|
|
|
for line in json_lines:
|
|
|
|
fout.write(line + '\n')
|
|
|
|
fout.write(line + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(dtype + '.meta', 'w') as f:
|
|
|
|
|
|
|
|
print(f"{dtype}:", file=f)
|
|
|
|
|
|
|
|
print(f"{total_num} utts", file=f)
|
|
|
|
|
|
|
|
print(f"{total_sec / (60*60)} h", file=f)
|
|
|
|
|
|
|
|
print(f"{total_text} text", file=f)
|
|
|
|
|
|
|
|
print(f"{total_text / total_sec} text/sec", file=f)
|
|
|
|
|
|
|
|
print(f"{total_sec / total_num} sec/utt", file=f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_dataset(url, md5sum, target_dir, manifest_path, subset):
|
|
|
|
def prepare_dataset(url, md5sum, target_dir, manifest_path, subset):
|
|
|
|
"""Download, unpack and create manifest file."""
|
|
|
|
"""Download, unpack and create manifest file."""
|
|
|
|