parent
03a5750276
commit
9de5ad63f9
@ -0,0 +1 @@
|
|||||||
|
results
|
@ -0,0 +1,5 @@
|
|||||||
|
# [AMI](https://groups.inf.ed.ac.uk/ami/corpus/)
|
||||||
|
|
||||||
|
The AMI Meeting Corpus is a multi-modal data set consisting of 100 hours of meeting recordings. For a gentle introduction to the corpus, see the [corpus overview](https://groups.inf.ed.ac.uk/ami/corpus/overview.shtml). To access the data, follow the directions given [there](https://groups.inf.ed.ac.uk/ami/download). Around two-thirds of the data has been elicited using a scenario in which the participants play different roles in a design team, taking a design project from kick-off to completion over the course of a day. The rest consists of naturally occurring meetings in a range of domains.
|
||||||
|
|
||||||
|
Detailed information can be found in the [documentation section](http://groups.inf.ed.ac.uk/ami/corpus/datasets.shtml).
|
@ -0,0 +1,602 @@
|
|||||||
|
"""
|
||||||
|
Data preparation.
|
||||||
|
|
||||||
|
Download: http://groups.inf.ed.ac.uk/ami/download/
|
||||||
|
|
||||||
|
Prepares metadata files (JSON) from manual annotations "segments/" using RTTM format (Oracle VAD).
|
||||||
|
|
||||||
|
Authors
|
||||||
|
* qingenz123@126.com (Qingen ZHAO) 2022
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import xml.etree.ElementTree as et
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
from ami_splits import get_AMI_split
|
||||||
|
from distutils.util import strtobool
|
||||||
|
|
||||||
|
from utils.dataio import (
|
||||||
|
load_pkl,
|
||||||
|
save_pkl,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
SAMPLERATE = 16000
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_ami(
|
||||||
|
data_folder,
|
||||||
|
manual_annot_folder,
|
||||||
|
save_folder,
|
||||||
|
ref_rttm_dir,
|
||||||
|
meta_data_dir,
|
||||||
|
split_type="full_corpus_asr",
|
||||||
|
skip_TNO=True,
|
||||||
|
mic_type="Mix-Headset",
|
||||||
|
vad_type="oracle",
|
||||||
|
max_subseg_dur=3.0,
|
||||||
|
overlap=1.5,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Prepares reference RTTM and JSON files for the AMI dataset.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
data_folder : str
|
||||||
|
Path to the folder where the original amicorpus is stored.
|
||||||
|
manual_annot_folder : str
|
||||||
|
Directory where the manual annotations are stored.
|
||||||
|
save_folder : str
|
||||||
|
The save directory in results.
|
||||||
|
ref_rttm_dir : str
|
||||||
|
Directory to store reference RTTM files.
|
||||||
|
meta_data_dir : str
|
||||||
|
Directory to store the meta data (json) files.
|
||||||
|
split_type : str
|
||||||
|
Standard dataset split. See ami_splits.py for more information.
|
||||||
|
Allowed split_type: "scenario_only", "full_corpus" or "full_corpus_asr"
|
||||||
|
skip_TNO: bool
|
||||||
|
Skips TNO meeting recordings if True.
|
||||||
|
mic_type : str
|
||||||
|
Type of microphone to be used.
|
||||||
|
vad_type : str
|
||||||
|
Type of VAD. Kept for future when VAD will be added.
|
||||||
|
max_subseg_dur : float
|
||||||
|
Duration in seconds of a subsegments to be prepared from larger segments.
|
||||||
|
overlap : float
|
||||||
|
Overlap duration in seconds between adjacent subsegments
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> from recipes.AMI.ami_prepare import prepare_ami
|
||||||
|
>>> data_folder = '/network/datasets/ami/amicorpus/'
|
||||||
|
>>> manual_annot_folder = '/home/mila/d/dawalatn/nauman/ami_public_manual/'
|
||||||
|
>>> save_folder = 'results/save/'
|
||||||
|
>>> split_type = 'full_corpus_asr'
|
||||||
|
>>> mic_type = 'Lapel'
|
||||||
|
>>> prepare_ami(data_folder, manual_annot_folder, save_folder, split_type, mic_type)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Meta files
|
||||||
|
meta_files = [
|
||||||
|
os.path.join(meta_data_dir, "ami_train." + mic_type + ".subsegs.json"),
|
||||||
|
os.path.join(meta_data_dir, "ami_dev." + mic_type + ".subsegs.json"),
|
||||||
|
os.path.join(meta_data_dir, "ami_eval." + mic_type + ".subsegs.json"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create configuration for easily skipping data_preparation stage
|
||||||
|
conf = {
|
||||||
|
"data_folder": data_folder,
|
||||||
|
"save_folder": save_folder,
|
||||||
|
"ref_rttm_dir": ref_rttm_dir,
|
||||||
|
"meta_data_dir": meta_data_dir,
|
||||||
|
"split_type": split_type,
|
||||||
|
"skip_TNO": skip_TNO,
|
||||||
|
"mic_type": mic_type,
|
||||||
|
"vad": vad_type,
|
||||||
|
"max_subseg_dur": max_subseg_dur,
|
||||||
|
"overlap": overlap,
|
||||||
|
"meta_files": meta_files,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not os.path.exists(save_folder):
|
||||||
|
os.makedirs(save_folder)
|
||||||
|
|
||||||
|
# Setting output option files.
|
||||||
|
opt_file = "opt_ami_prepare." + mic_type + ".pkl"
|
||||||
|
|
||||||
|
# Check if this phase is already done (if so, skip it)
|
||||||
|
if skip(save_folder, conf, meta_files, opt_file):
|
||||||
|
logger.info(
|
||||||
|
"Skipping data preparation, as it was completed in previous run."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
msg = "\tCreating meta-data file for the AMI Dataset.."
|
||||||
|
logger.debug(msg)
|
||||||
|
|
||||||
|
# Get the split
|
||||||
|
train_set, dev_set, eval_set = get_AMI_split(split_type)
|
||||||
|
|
||||||
|
# Prepare RTTM from XML(manual annot) and store are groundtruth
|
||||||
|
# Create ref_RTTM directory
|
||||||
|
if not os.path.exists(ref_rttm_dir):
|
||||||
|
os.makedirs(ref_rttm_dir)
|
||||||
|
|
||||||
|
# Create reference RTTM files
|
||||||
|
splits = ["train", "dev", "eval"]
|
||||||
|
for i in splits:
|
||||||
|
rttm_file = ref_rttm_dir + "/fullref_ami_" + i + ".rttm"
|
||||||
|
if i == "train":
|
||||||
|
prepare_segs_for_RTTM(
|
||||||
|
train_set,
|
||||||
|
rttm_file,
|
||||||
|
data_folder,
|
||||||
|
manual_annot_folder,
|
||||||
|
i,
|
||||||
|
skip_TNO,
|
||||||
|
)
|
||||||
|
if i == "dev":
|
||||||
|
prepare_segs_for_RTTM(
|
||||||
|
dev_set,
|
||||||
|
rttm_file,
|
||||||
|
data_folder,
|
||||||
|
manual_annot_folder,
|
||||||
|
i,
|
||||||
|
skip_TNO,
|
||||||
|
)
|
||||||
|
if i == "eval":
|
||||||
|
prepare_segs_for_RTTM(
|
||||||
|
eval_set,
|
||||||
|
rttm_file,
|
||||||
|
data_folder,
|
||||||
|
manual_annot_folder,
|
||||||
|
i,
|
||||||
|
skip_TNO,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create meta_files for splits
|
||||||
|
meta_data_dir = meta_data_dir
|
||||||
|
if not os.path.exists(meta_data_dir):
|
||||||
|
os.makedirs(meta_data_dir)
|
||||||
|
|
||||||
|
for i in splits:
|
||||||
|
rttm_file = ref_rttm_dir + "/fullref_ami_" + i + ".rttm"
|
||||||
|
meta_filename_prefix = "ami_" + i
|
||||||
|
prepare_metadata(
|
||||||
|
rttm_file,
|
||||||
|
meta_data_dir,
|
||||||
|
data_folder,
|
||||||
|
meta_filename_prefix,
|
||||||
|
max_subseg_dur,
|
||||||
|
overlap,
|
||||||
|
mic_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
save_opt_file = os.path.join(save_folder, opt_file)
|
||||||
|
save_pkl(conf, save_opt_file)
|
||||||
|
|
||||||
|
|
||||||
|
def get_RTTM_per_rec(segs, spkrs_list, rec_id):
|
||||||
|
"""Prepares rttm for each recording
|
||||||
|
"""
|
||||||
|
|
||||||
|
rttm = []
|
||||||
|
|
||||||
|
# Prepare header
|
||||||
|
for spkr_id in spkrs_list:
|
||||||
|
# e.g. SPKR-INFO ES2008c 0 <NA> <NA> <NA> unknown ES2008c.A_PM <NA> <NA>
|
||||||
|
line = (
|
||||||
|
"SPKR-INFO "
|
||||||
|
+ rec_id
|
||||||
|
+ " 0 <NA> <NA> <NA> unknown "
|
||||||
|
+ spkr_id
|
||||||
|
+ " <NA> <NA>"
|
||||||
|
)
|
||||||
|
rttm.append(line)
|
||||||
|
|
||||||
|
# Append remaining lines
|
||||||
|
for row in segs:
|
||||||
|
# e.g. SPEAKER ES2008c 0 37.880 0.590 <NA> <NA> ES2008c.A_PM <NA> <NA>
|
||||||
|
|
||||||
|
if float(row[1]) < float(row[0]):
|
||||||
|
msg1 = (
|
||||||
|
"Possibly Incorrect Annotation Found!! transcriber_start (%s) > transcriber_end (%s)"
|
||||||
|
% (row[0], row[1])
|
||||||
|
)
|
||||||
|
msg2 = (
|
||||||
|
"Excluding this incorrect row from the RTTM : %s, %s, %s, %s"
|
||||||
|
% (
|
||||||
|
rec_id,
|
||||||
|
row[0],
|
||||||
|
str(round(float(row[1]) - float(row[0]), 4)),
|
||||||
|
str(row[2]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(msg1)
|
||||||
|
logger.info(msg2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
line = (
|
||||||
|
"SPEAKER "
|
||||||
|
+ rec_id
|
||||||
|
+ " 0 "
|
||||||
|
+ str(round(float(row[0]), 4))
|
||||||
|
+ " "
|
||||||
|
+ str(round(float(row[1]) - float(row[0]), 4))
|
||||||
|
+ " <NA> <NA> "
|
||||||
|
+ str(row[2])
|
||||||
|
+ " <NA> <NA>"
|
||||||
|
)
|
||||||
|
rttm.append(line)
|
||||||
|
|
||||||
|
return rttm
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_segs_for_RTTM(
|
||||||
|
list_ids, out_rttm_file, audio_dir, annot_dir, split_type, skip_TNO
|
||||||
|
):
|
||||||
|
|
||||||
|
RTTM = [] # Stores all RTTMs clubbed together for a given dataset split
|
||||||
|
|
||||||
|
for main_meet_id in list_ids:
|
||||||
|
|
||||||
|
# Skip TNO meetings from dev and eval sets
|
||||||
|
if (
|
||||||
|
main_meet_id.startswith("TS")
|
||||||
|
and split_type != "train"
|
||||||
|
and skip_TNO is True
|
||||||
|
):
|
||||||
|
msg = (
|
||||||
|
"Skipping TNO meeting in AMI "
|
||||||
|
+ str(split_type)
|
||||||
|
+ " set : "
|
||||||
|
+ str(main_meet_id)
|
||||||
|
)
|
||||||
|
logger.info(msg)
|
||||||
|
continue
|
||||||
|
|
||||||
|
list_sessions = glob.glob(audio_dir + "/" + main_meet_id + "*")
|
||||||
|
list_sessions.sort()
|
||||||
|
|
||||||
|
for sess in list_sessions:
|
||||||
|
rec_id = os.path.basename(sess)
|
||||||
|
path = annot_dir + "/segments/" + rec_id
|
||||||
|
f = path + ".*.segments.xml"
|
||||||
|
list_spkr_xmls = glob.glob(f)
|
||||||
|
list_spkr_xmls.sort() # A, B, C, D, E etc (Speakers)
|
||||||
|
segs = []
|
||||||
|
spkrs_list = (
|
||||||
|
[]
|
||||||
|
) # Since non-scenario recordings contains 3-5 speakers
|
||||||
|
|
||||||
|
for spkr_xml_file in list_spkr_xmls:
|
||||||
|
|
||||||
|
# Speaker ID
|
||||||
|
spkr = os.path.basename(spkr_xml_file).split(".")[1]
|
||||||
|
spkr_ID = rec_id + "." + spkr
|
||||||
|
spkrs_list.append(spkr_ID)
|
||||||
|
|
||||||
|
# Parse xml tree
|
||||||
|
tree = et.parse(spkr_xml_file)
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
# Start, end and speaker_ID from xml file
|
||||||
|
segs = segs + [
|
||||||
|
[
|
||||||
|
elem.attrib["transcriber_start"],
|
||||||
|
elem.attrib["transcriber_end"],
|
||||||
|
spkr_ID,
|
||||||
|
]
|
||||||
|
for elem in root.iter("segment")
|
||||||
|
]
|
||||||
|
|
||||||
|
# Sort rows as per the start time (per recording)
|
||||||
|
segs.sort(key=lambda x: float(x[0]))
|
||||||
|
|
||||||
|
rttm_per_rec = get_RTTM_per_rec(segs, spkrs_list, rec_id)
|
||||||
|
RTTM = RTTM + rttm_per_rec
|
||||||
|
|
||||||
|
# Write one RTTM as groundtruth. For example, "fullref_eval.rttm"
|
||||||
|
with open(out_rttm_file, "w") as f:
|
||||||
|
for item in RTTM:
|
||||||
|
f.write("%s\n" % item)
|
||||||
|
|
||||||
|
|
||||||
|
def is_overlapped(end1, start2):
|
||||||
|
"""Returns True if the two segments overlap
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
end1 : float
|
||||||
|
End time of the first segment.
|
||||||
|
start2 : float
|
||||||
|
Start time of the second segment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if start2 > end1:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def merge_rttm_intervals(rttm_segs):
|
||||||
|
"""Merges adjacent segments in rttm if they overlap.
|
||||||
|
"""
|
||||||
|
# For one recording
|
||||||
|
# rec_id = rttm_segs[0][1]
|
||||||
|
rttm_segs.sort(key=lambda x: float(x[3]))
|
||||||
|
|
||||||
|
# first_seg = rttm_segs[0] # first interval.. as it is
|
||||||
|
merged_segs = [rttm_segs[0]]
|
||||||
|
strt = float(rttm_segs[0][3])
|
||||||
|
end = float(rttm_segs[0][3]) + float(rttm_segs[0][4])
|
||||||
|
|
||||||
|
for row in rttm_segs[1:]:
|
||||||
|
s = float(row[3])
|
||||||
|
e = float(row[3]) + float(row[4])
|
||||||
|
|
||||||
|
if is_overlapped(end, s):
|
||||||
|
# Update only end. The strt will be same as in last segment
|
||||||
|
# Just update last row in the merged_segs
|
||||||
|
end = max(end, e)
|
||||||
|
merged_segs[-1][3] = str(round(strt, 4))
|
||||||
|
merged_segs[-1][4] = str(round((end - strt), 4))
|
||||||
|
merged_segs[-1][7] = "overlap" # previous_row[7] + '-'+ row[7]
|
||||||
|
else:
|
||||||
|
# Add a new disjoint segment
|
||||||
|
strt = s
|
||||||
|
end = e
|
||||||
|
merged_segs.append(row) # this will have 1 spkr ID
|
||||||
|
|
||||||
|
return merged_segs
|
||||||
|
|
||||||
|
|
||||||
|
def get_subsegments(merged_segs, max_subseg_dur=3.0, overlap=1.5):
|
||||||
|
"""Divides bigger segments into smaller sub-segments
|
||||||
|
"""
|
||||||
|
|
||||||
|
shift = max_subseg_dur - overlap
|
||||||
|
subsegments = []
|
||||||
|
|
||||||
|
# These rows are in RTTM format
|
||||||
|
for row in merged_segs:
|
||||||
|
seg_dur = float(row[4])
|
||||||
|
rec_id = row[1]
|
||||||
|
|
||||||
|
if seg_dur > max_subseg_dur:
|
||||||
|
num_subsegs = int(seg_dur / shift)
|
||||||
|
# Taking 0.01 sec as small step
|
||||||
|
seg_start = float(row[3])
|
||||||
|
seg_end = seg_start + seg_dur
|
||||||
|
|
||||||
|
# Now divide this segment (new_row) in smaller subsegments
|
||||||
|
for i in range(num_subsegs):
|
||||||
|
subseg_start = seg_start + i * shift
|
||||||
|
subseg_end = min(subseg_start + max_subseg_dur - 0.01, seg_end)
|
||||||
|
subseg_dur = subseg_end - subseg_start
|
||||||
|
|
||||||
|
new_row = [
|
||||||
|
"SPEAKER",
|
||||||
|
rec_id,
|
||||||
|
"0",
|
||||||
|
str(round(float(subseg_start), 4)),
|
||||||
|
str(round(float(subseg_dur), 4)),
|
||||||
|
"<NA>",
|
||||||
|
"<NA>",
|
||||||
|
row[7],
|
||||||
|
"<NA>",
|
||||||
|
"<NA>",
|
||||||
|
]
|
||||||
|
|
||||||
|
subsegments.append(new_row)
|
||||||
|
|
||||||
|
# Break if exceeding the boundary
|
||||||
|
if subseg_end >= seg_end:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
subsegments.append(row)
|
||||||
|
|
||||||
|
return subsegments
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_metadata(
|
||||||
|
rttm_file, save_dir, data_dir, filename, max_subseg_dur, overlap, mic_type
|
||||||
|
):
|
||||||
|
# Read RTTM, get unique meeting_IDs (from RTTM headers)
|
||||||
|
# For each MeetingID. select that meetID -> merge -> subsegment -> json -> append
|
||||||
|
|
||||||
|
# Read RTTM
|
||||||
|
RTTM = []
|
||||||
|
with open(rttm_file, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
entry = line[:-1]
|
||||||
|
RTTM.append(entry)
|
||||||
|
|
||||||
|
spkr_info = filter(lambda x: x.startswith("SPKR-INFO"), RTTM)
|
||||||
|
rec_ids = list(set([row.split(" ")[1] for row in spkr_info]))
|
||||||
|
rec_ids.sort() # sorting just to make JSON look in proper sequence
|
||||||
|
|
||||||
|
# For each recording merge segments and then perform subsegmentation
|
||||||
|
MERGED_SEGMENTS = []
|
||||||
|
SUBSEGMENTS = []
|
||||||
|
for rec_id in rec_ids:
|
||||||
|
segs_iter = filter(
|
||||||
|
lambda x: x.startswith("SPEAKER " + str(rec_id)), RTTM
|
||||||
|
)
|
||||||
|
gt_rttm_segs = [row.split(" ") for row in segs_iter]
|
||||||
|
|
||||||
|
# Merge, subsegment and then convert to json format.
|
||||||
|
merged_segs = merge_rttm_intervals(
|
||||||
|
gt_rttm_segs
|
||||||
|
) # We lose speaker_ID after merging
|
||||||
|
MERGED_SEGMENTS = MERGED_SEGMENTS + merged_segs
|
||||||
|
|
||||||
|
# Divide segments into smaller sub-segments
|
||||||
|
subsegs = get_subsegments(merged_segs, max_subseg_dur, overlap)
|
||||||
|
SUBSEGMENTS = SUBSEGMENTS + subsegs
|
||||||
|
|
||||||
|
# Write segment AND sub-segments (in RTTM format)
|
||||||
|
segs_file = save_dir + "/" + filename + ".segments.rttm"
|
||||||
|
subsegment_file = save_dir + "/" + filename + ".subsegments.rttm"
|
||||||
|
|
||||||
|
with open(segs_file, "w") as f:
|
||||||
|
for row in MERGED_SEGMENTS:
|
||||||
|
line_str = " ".join(row)
|
||||||
|
f.write("%s\n" % line_str)
|
||||||
|
|
||||||
|
with open(subsegment_file, "w") as f:
|
||||||
|
for row in SUBSEGMENTS:
|
||||||
|
line_str = " ".join(row)
|
||||||
|
f.write("%s\n" % line_str)
|
||||||
|
|
||||||
|
# Create JSON from subsegments
|
||||||
|
json_dict = {}
|
||||||
|
for row in SUBSEGMENTS:
|
||||||
|
rec_id = row[1]
|
||||||
|
strt = str(round(float(row[3]), 4))
|
||||||
|
end = str(round((float(row[3]) + float(row[4])), 4))
|
||||||
|
subsegment_ID = rec_id + "_" + strt + "_" + end
|
||||||
|
dur = row[4]
|
||||||
|
start_sample = int(float(strt) * SAMPLERATE)
|
||||||
|
end_sample = int(float(end) * SAMPLERATE)
|
||||||
|
|
||||||
|
# If multi-mic audio is selected
|
||||||
|
if mic_type == "Array1":
|
||||||
|
wav_file_base_path = (
|
||||||
|
data_dir
|
||||||
|
+ "/"
|
||||||
|
+ rec_id
|
||||||
|
+ "/audio/"
|
||||||
|
+ rec_id
|
||||||
|
+ "."
|
||||||
|
+ mic_type
|
||||||
|
+ "-"
|
||||||
|
)
|
||||||
|
|
||||||
|
f = [] # adding all 8 mics
|
||||||
|
for i in range(8):
|
||||||
|
f.append(wav_file_base_path + str(i + 1).zfill(2) + ".wav")
|
||||||
|
audio_files_path_list = f
|
||||||
|
|
||||||
|
# Note: key "files" with 's' is used for multi-mic
|
||||||
|
json_dict[subsegment_ID] = {
|
||||||
|
"wav": {
|
||||||
|
"files": audio_files_path_list,
|
||||||
|
"duration": float(dur),
|
||||||
|
"start": int(start_sample),
|
||||||
|
"stop": int(end_sample),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Single mic audio
|
||||||
|
wav_file_path = (
|
||||||
|
data_dir
|
||||||
|
+ "/"
|
||||||
|
+ rec_id
|
||||||
|
+ "/audio/"
|
||||||
|
+ rec_id
|
||||||
|
+ "."
|
||||||
|
+ mic_type
|
||||||
|
+ ".wav"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: key "file" without 's' is used for single-mic
|
||||||
|
json_dict[subsegment_ID] = {
|
||||||
|
"wav": {
|
||||||
|
"file": wav_file_path,
|
||||||
|
"duration": float(dur),
|
||||||
|
"start": int(start_sample),
|
||||||
|
"stop": int(end_sample),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
out_json_file = save_dir + "/" + filename + "." + mic_type + ".subsegs.json"
|
||||||
|
with open(out_json_file, mode="w") as json_f:
|
||||||
|
json.dump(json_dict, json_f, indent=2)
|
||||||
|
|
||||||
|
msg = "%s JSON prepared" % (out_json_file)
|
||||||
|
logger.debug(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def skip(save_folder, conf, meta_files, opt_file):
|
||||||
|
"""
|
||||||
|
Detects if the AMI data_preparation has been already done.
|
||||||
|
If the preparation has been done, we can skip it.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
if True, the preparation phase can be skipped.
|
||||||
|
if False, it must be done.
|
||||||
|
"""
|
||||||
|
# Checking if meta (json) files are available
|
||||||
|
skip = True
|
||||||
|
for file_path in meta_files:
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
skip = False
|
||||||
|
|
||||||
|
# Checking saved options
|
||||||
|
save_opt_file = os.path.join(save_folder, opt_file)
|
||||||
|
if skip is True:
|
||||||
|
if os.path.isfile(save_opt_file):
|
||||||
|
opts_old = load_pkl(save_opt_file)
|
||||||
|
if opts_old == conf:
|
||||||
|
skip = True
|
||||||
|
else:
|
||||||
|
skip = False
|
||||||
|
else:
|
||||||
|
skip = False
|
||||||
|
|
||||||
|
return skip
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog='python ami_prepare.py --data_folder /home/data/ami/amicorpus \
|
||||||
|
--manual_annot_folder /home/data/ami/ami_public_manual_1.6.2 \
|
||||||
|
--save_folder ./results/ --ref_rttm_dir ./results/ref_rttms \
|
||||||
|
--meta_data_dir ./results/metadata',
|
||||||
|
description='AMI Data preparation')
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_folder', required=True, help='Path to the folder where the original amicorpus is stored')
|
||||||
|
parser.add_argument(
|
||||||
|
'--manual_annot_folder', required=True, help='Directory where the manual annotations are stored')
|
||||||
|
parser.add_argument(
|
||||||
|
'--save_folder', required=True, help='The save directory in results')
|
||||||
|
parser.add_argument(
|
||||||
|
'--ref_rttm_dir', required=True, help='Directory to store reference RTTM files')
|
||||||
|
parser.add_argument(
|
||||||
|
'--meta_data_dir', required=True, help='Directory to store the meta data (json) files')
|
||||||
|
parser.add_argument(
|
||||||
|
'--split_type',
|
||||||
|
default="full_corpus_asr",
|
||||||
|
help='Standard dataset split. See ami_splits.py for more information')
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip_TNO', default=True, type=strtobool, help='Skips TNO meeting recordings if True')
|
||||||
|
parser.add_argument(
|
||||||
|
'--mic_type', default="Mix-Headset", help='Type of microphone to be used')
|
||||||
|
parser.add_argument(
|
||||||
|
'--vad_type', default="oracle", help='Type of VAD. Kept for future when VAD will be added')
|
||||||
|
parser.add_argument(
|
||||||
|
'--max_subseg_dur',
|
||||||
|
default=3.0,
|
||||||
|
type=float,
|
||||||
|
help='Duration in seconds of a subsegments to be prepared from larger segments')
|
||||||
|
parser.add_argument(
|
||||||
|
'--overlap', default=1.5, type=float, help='Overlap duration in seconds between adjacent subsegments')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
print(args)
|
||||||
|
|
||||||
|
prepare_ami(
|
||||||
|
args.data_folder,
|
||||||
|
args.manual_annot_folder,
|
||||||
|
args.save_folder,
|
||||||
|
args.ref_rttm_dir,
|
||||||
|
args.meta_data_dir
|
||||||
|
)
|
@ -0,0 +1,252 @@
|
|||||||
|
"""
|
||||||
|
AMI corpus contained 100 hours of meeting recording.
|
||||||
|
This script returns the standard train, dev and eval split for AMI corpus.
|
||||||
|
For more information on dataset please refer to http://groups.inf.ed.ac.uk/ami/corpus/datasets.shtml
|
||||||
|
|
||||||
|
Authors
|
||||||
|
* qingenz123@126.com (Qingen ZHAO) 2022
|
||||||
|
|
||||||
|
Credits
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
ALLOWED_OPTIONS = ["scenario_only", "full_corpus", "full_corpus_asr"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_AMI_split(split_option):
|
||||||
|
"""
|
||||||
|
Prepares train, dev, and test sets for given split_option
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
split_option: str
|
||||||
|
The standard split option.
|
||||||
|
Allowed options: "scenario_only", "full_corpus", "full_corpus_asr"
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Meeting IDs for train, dev, and test sets for given split_option
|
||||||
|
"""
|
||||||
|
|
||||||
|
if split_option not in ALLOWED_OPTIONS:
|
||||||
|
print(
|
||||||
|
f'Invalid split "{split_option}" requested!\nValid split_options are: ',
|
||||||
|
ALLOWED_OPTIONS,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if split_option == "scenario_only":
|
||||||
|
|
||||||
|
train_set = [
|
||||||
|
"ES2002",
|
||||||
|
"ES2005",
|
||||||
|
"ES2006",
|
||||||
|
"ES2007",
|
||||||
|
"ES2008",
|
||||||
|
"ES2009",
|
||||||
|
"ES2010",
|
||||||
|
"ES2012",
|
||||||
|
"ES2013",
|
||||||
|
"ES2015",
|
||||||
|
"ES2016",
|
||||||
|
"IS1000",
|
||||||
|
"IS1001",
|
||||||
|
"IS1002",
|
||||||
|
"IS1003",
|
||||||
|
"IS1004",
|
||||||
|
"IS1005",
|
||||||
|
"IS1006",
|
||||||
|
"IS1007",
|
||||||
|
"TS3005",
|
||||||
|
"TS3008",
|
||||||
|
"TS3009",
|
||||||
|
"TS3010",
|
||||||
|
"TS3011",
|
||||||
|
"TS3012",
|
||||||
|
]
|
||||||
|
|
||||||
|
dev_set = [
|
||||||
|
"ES2003",
|
||||||
|
"ES2011",
|
||||||
|
"IS1008",
|
||||||
|
"TS3004",
|
||||||
|
"TS3006",
|
||||||
|
]
|
||||||
|
|
||||||
|
test_set = [
|
||||||
|
"ES2004",
|
||||||
|
"ES2014",
|
||||||
|
"IS1009",
|
||||||
|
"TS3003",
|
||||||
|
"TS3007",
|
||||||
|
]
|
||||||
|
|
||||||
|
if split_option == "full_corpus":
|
||||||
|
# List of train: SA (TRAINING PART OF SEEN DATA)
|
||||||
|
train_set = [
|
||||||
|
"ES2002",
|
||||||
|
"ES2005",
|
||||||
|
"ES2006",
|
||||||
|
"ES2007",
|
||||||
|
"ES2008",
|
||||||
|
"ES2009",
|
||||||
|
"ES2010",
|
||||||
|
"ES2012",
|
||||||
|
"ES2013",
|
||||||
|
"ES2015",
|
||||||
|
"ES2016",
|
||||||
|
"IS1000",
|
||||||
|
"IS1001",
|
||||||
|
"IS1002",
|
||||||
|
"IS1003",
|
||||||
|
"IS1004",
|
||||||
|
"IS1005",
|
||||||
|
"IS1006",
|
||||||
|
"IS1007",
|
||||||
|
"TS3005",
|
||||||
|
"TS3008",
|
||||||
|
"TS3009",
|
||||||
|
"TS3010",
|
||||||
|
"TS3011",
|
||||||
|
"TS3012",
|
||||||
|
"EN2001",
|
||||||
|
"EN2003",
|
||||||
|
"EN2004",
|
||||||
|
"EN2005",
|
||||||
|
"EN2006",
|
||||||
|
"EN2009",
|
||||||
|
"IN1001",
|
||||||
|
"IN1002",
|
||||||
|
"IN1005",
|
||||||
|
"IN1007",
|
||||||
|
"IN1008",
|
||||||
|
"IN1009",
|
||||||
|
"IN1012",
|
||||||
|
"IN1013",
|
||||||
|
"IN1014",
|
||||||
|
"IN1016",
|
||||||
|
]
|
||||||
|
|
||||||
|
# List of dev: SB (DEV PART OF SEEN DATA)
|
||||||
|
dev_set = [
|
||||||
|
"ES2003",
|
||||||
|
"ES2011",
|
||||||
|
"IS1008",
|
||||||
|
"TS3004",
|
||||||
|
"TS3006",
|
||||||
|
"IB4001",
|
||||||
|
"IB4002",
|
||||||
|
"IB4003",
|
||||||
|
"IB4004",
|
||||||
|
"IB4010",
|
||||||
|
"IB4011",
|
||||||
|
]
|
||||||
|
|
||||||
|
# List of test: SC (UNSEEN DATA FOR EVALUATION)
|
||||||
|
# Note that IB4005 does not appear because it has speakers in common with two sets of data.
|
||||||
|
test_set = [
|
||||||
|
"ES2004",
|
||||||
|
"ES2014",
|
||||||
|
"IS1009",
|
||||||
|
"TS3003",
|
||||||
|
"TS3007",
|
||||||
|
"EN2002",
|
||||||
|
]
|
||||||
|
|
||||||
|
if split_option == "full_corpus_asr":
|
||||||
|
train_set = [
|
||||||
|
"ES2002",
|
||||||
|
"ES2003",
|
||||||
|
"ES2005",
|
||||||
|
"ES2006",
|
||||||
|
"ES2007",
|
||||||
|
"ES2008",
|
||||||
|
"ES2009",
|
||||||
|
"ES2010",
|
||||||
|
"ES2012",
|
||||||
|
"ES2013",
|
||||||
|
"ES2014",
|
||||||
|
"ES2015",
|
||||||
|
"ES2016",
|
||||||
|
"IS1000",
|
||||||
|
"IS1001",
|
||||||
|
"IS1002",
|
||||||
|
"IS1003",
|
||||||
|
"IS1004",
|
||||||
|
"IS1005",
|
||||||
|
"IS1006",
|
||||||
|
"IS1007",
|
||||||
|
"TS3005",
|
||||||
|
"TS3006",
|
||||||
|
"TS3007",
|
||||||
|
"TS3008",
|
||||||
|
"TS3009",
|
||||||
|
"TS3010",
|
||||||
|
"TS3011",
|
||||||
|
"TS3012",
|
||||||
|
"EN2001",
|
||||||
|
"EN2003",
|
||||||
|
"EN2004",
|
||||||
|
"EN2005",
|
||||||
|
"EN2006",
|
||||||
|
"EN2009",
|
||||||
|
"IN1001",
|
||||||
|
"IN1002",
|
||||||
|
"IN1005",
|
||||||
|
"IN1007",
|
||||||
|
"IN1008",
|
||||||
|
"IN1009",
|
||||||
|
"IN1012",
|
||||||
|
"IN1013",
|
||||||
|
"IN1014",
|
||||||
|
"IN1016",
|
||||||
|
]
|
||||||
|
|
||||||
|
dev_set0 = [
|
||||||
|
"ES2011",
|
||||||
|
"IS1008",
|
||||||
|
"TS3004",
|
||||||
|
"IB4001",
|
||||||
|
"IB4002",
|
||||||
|
"IB4003",
|
||||||
|
"IB4004",
|
||||||
|
"IB4010",
|
||||||
|
"IB4011",
|
||||||
|
]
|
||||||
|
|
||||||
|
test_set0 = [
|
||||||
|
"ES2004",
|
||||||
|
"IS1009",
|
||||||
|
"TS3003",
|
||||||
|
"EN2002",
|
||||||
|
]
|
||||||
|
|
||||||
|
dev_set1 = [
|
||||||
|
"ES2011a",
|
||||||
|
"IS1008a",
|
||||||
|
"TS3004a",
|
||||||
|
"IB4001",
|
||||||
|
"IB4002",
|
||||||
|
"IB4003",
|
||||||
|
"IB4004",
|
||||||
|
]
|
||||||
|
test_set1 = [
|
||||||
|
"ES2004a",
|
||||||
|
"IS1009a",
|
||||||
|
"TS3003a",
|
||||||
|
"EN2001a",
|
||||||
|
]
|
||||||
|
|
||||||
|
train_set = [
|
||||||
|
"IB4001",
|
||||||
|
"IB4002",
|
||||||
|
"IB4003",
|
||||||
|
"IB4004",
|
||||||
|
]
|
||||||
|
dev_set = [
|
||||||
|
"IB4002",
|
||||||
|
]
|
||||||
|
test_set = [
|
||||||
|
"IB4004",
|
||||||
|
]
|
||||||
|
return train_set, dev_set, test_set
|
@ -0,0 +1,82 @@
|
|||||||
|
"""
|
||||||
|
Data reading and writing.
|
||||||
|
|
||||||
|
Authors
|
||||||
|
* qingenz123@126.com (Qingen ZHAO) 2022
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
def save_pkl(obj, file):
|
||||||
|
"""Save an object in pkl format.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
obj : object
|
||||||
|
Object to save in pkl format
|
||||||
|
file : str
|
||||||
|
Path to the output file
|
||||||
|
sampling_rate : int
|
||||||
|
Sampling rate of the audio file, TODO: this is not used?
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> tmpfile = os.path.join(getfixture('tmpdir'), "example.pkl")
|
||||||
|
>>> save_pkl([1, 2, 3, 4, 5], tmpfile)
|
||||||
|
>>> load_pkl(tmpfile)
|
||||||
|
[1, 2, 3, 4, 5]
|
||||||
|
"""
|
||||||
|
with open(file, "wb") as f:
|
||||||
|
pickle.dump(obj, f)
|
||||||
|
|
||||||
|
def load_pickle(pickle_path):
|
||||||
|
"""Utility function for loading .pkl pickle files.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
pickle_path : str
|
||||||
|
Path to pickle file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
out : object
|
||||||
|
Python object loaded from pickle.
|
||||||
|
"""
|
||||||
|
with open(pickle_path, "rb") as f:
|
||||||
|
out = pickle.load(f)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def load_pkl(file):
|
||||||
|
"""Loads a pkl file.
|
||||||
|
|
||||||
|
For an example, see `save_pkl`.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
file : str
|
||||||
|
Path to the input pkl file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
The loaded object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Deals with the situation where two processes are trying
|
||||||
|
# to access the same label dictionary by creating a lock
|
||||||
|
count = 100
|
||||||
|
while count > 0:
|
||||||
|
if os.path.isfile(file + ".lock"):
|
||||||
|
time.sleep(1)
|
||||||
|
count -= 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
open(file + ".lock", "w").close()
|
||||||
|
with open(file, "rb") as f:
|
||||||
|
return pickle.load(f)
|
||||||
|
finally:
|
||||||
|
if os.path.isfile(file + ".lock"):
|
||||||
|
os.remove(file + ".lock")
|
Loading…
Reference in new issue